From 8a6186ba035dd9e4c5296547cd202adbc205ef8f Mon Sep 17 00:00:00 2001 From: leitner Date: Thu, 9 Apr 2015 19:05:32 +0000 Subject: [PATCH] add critbit --- CHANGES | 1 + GNUmakefile | 9 +- critbit.h | 23 ++++ critbit/critbit.c | 227 +++++++++++++++++++++++++++++++++ critbit/critbit0_allprefixed.3 | 33 +++++ critbit/critbit0_clear.3 | 16 +++ critbit/critbit0_contains.3 | 17 +++ critbit/critbit0_delete.3 | 18 +++ critbit/critbit0_insert.3 | 21 +++ t.c | 43 +++++++ 10 files changed, 405 insertions(+), 3 deletions(-) create mode 100644 critbit.h create mode 100644 critbit/critbit.c create mode 100644 critbit/critbit0_allprefixed.3 create mode 100644 critbit/critbit0_clear.3 create mode 100644 critbit/critbit0_contains.3 create mode 100644 critbit/critbit0_delete.3 create mode 100644 critbit/critbit0_insert.3 diff --git a/CHANGES b/CHANGES index 96c3f31..d0fe646 100644 --- a/CHANGES +++ b/CHANGES @@ -29,6 +29,7 @@ if SOCK_NONBLOCK is defined, use it instead of socket+fcntl ... but if errno==EINVAL still fall back to socket+fcntl (Robert Henney) SECURITY: fix botched integer overflow handling logic in stralloc_ready (Giorgio) + add critbit 0.29: save 8 bytes in taia.h for 64-bit systems diff --git a/GNUmakefile b/GNUmakefile index 73cd05a..9a06b04 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -11,7 +11,7 @@ MAN3DIR=${prefix}/man/man3 LIBS=byte.a fmt.a scan.a str.a uint.a open.a stralloc.a unix.a socket.a \ buffer.a mmap.a taia.a tai.a dns.a case.a mult.a array.a io.a \ -textcode.a cdb.a +textcode.a cdb.a critbit.a all: ent $(LIBS) libowfat.a libsocket t @@ -58,7 +58,7 @@ endif # to build without diet libc support, use $ make DIET= # see http://www.fefe.de/dietlibc/ for details about the diet libc -VPATH=str:byte:fmt:scan:uint:open:stralloc:unix:socket:buffer:mmap:textcode:taia:tai:dns:case:array:mult:io:cdb +VPATH=str:byte:fmt:scan:uint:open:stralloc:unix:socket:buffer:mmap:textcode:taia:tai:dns:case:array:mult:io:cdb:critbit BYTE_OBJS=$(patsubst byte/%.c,%.o,$(wildcard byte/*.c)) FMT_OBJS=$(patsubst fmt/%.c,%.o,$(wildcard fmt/*.c)) @@ -80,6 +80,7 @@ ARRAY_OBJS=$(patsubst array/%.c,%.o,$(wildcard array/*.c)) MULT_OBJS=$(patsubst mult/%.c,%.o,$(wildcard mult/*.c)) IO_OBJS=$(patsubst io/%.c,%.o,$(wildcard io/*.c)) CDB_OBJS=$(patsubst cdb/%.c,%.o,$(wildcard cdb/*.c)) +CRITBIT_OBJS=$(patsubst critbit/%.c,%.o,$(wildcard critbit/*.c)) $(BYTE_OBJS): byte.h $(FMT_OBJS): fmt.h @@ -99,6 +100,7 @@ $(ARRAY_OBJS): uint64.h array.h $(MULT_OBJS): uint64.h uint32.h uint16.h safemult.h $(IO_OBJS): uint64.h array.h io.h io_internal.h taia.h tai.h haveepoll.h havekqueue.h havesigio.h havebsdsf.h havedevpoll.h havesendfile.h $(CDB_OBJS): cdb.h uint32.h +$(CRITBIT_OBJS): critbit.h mult64.o: haveuint128.h @@ -133,12 +135,13 @@ array.a: $(ARRAY_OBJS) mult.a: $(MULT_OBJS) io.a: $(IO_OBJS) cdb.a: $(CDB_OBJS) +critbit.a: $(CRITBIT_OBJS) ALL_OBJS=$(DNS_OBJS) $(BYTE_OBJS) $(FMT_OBJS) $(SCAN_OBJS) \ $(STR_OBJS) $(UINT_OBJS) $(OPEN_OBJS) $(STRALLOC_OBJS) $(UNIX_OBJS) \ $(SOCKET_OBJS) $(BUFFER_OBJS) $(MMAP_OBJS) $(TEXTCODE_OBJS) \ $(TAIA_OBJS) $(TAI_OBJS) $(CASE_OBJS) $(ARRAY_OBJS) $(MULT_OBJS) \ -$(IO_OBJS) $(CDB_OBJS) +$(IO_OBJS) $(CDB_OBJS) $(CRITBIT_OBJS) libowfat.a: $(ALL_OBJS) $(CROSS)ar cru $@ $(ALL_OBJS) diff --git a/critbit.h b/critbit.h new file mode 100644 index 0000000..11d096b --- /dev/null +++ b/critbit.h @@ -0,0 +1,23 @@ +#ifndef CRITBIT_H_ +#define CRITBIT_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + void *root; +} critbit0_tree; + +int critbit0_contains(critbit0_tree *t, const char *u); +int critbit0_insert(critbit0_tree *t, const char *u); +int critbit0_delete(critbit0_tree *t, const char *u); +void critbit0_clear(critbit0_tree *t); +int critbit0_allprefixed(critbit0_tree *t, const char *prefix, + int (*handle) (const char *, void *), void *arg); + +#ifdef __cplusplus +}; +#endif + +#endif diff --git a/critbit/critbit.c b/critbit/critbit.c new file mode 100644 index 0000000..19442e3 --- /dev/null +++ b/critbit/critbit.c @@ -0,0 +1,227 @@ +#include /* size_t, uintptr_t */ +#include /* for uint8_t, uint32_t */ + +#include +#include + +#include "critbit.h" + +typedef struct { + void* child[2]; + uint32_t byte; + uint8_t otherbits; +} critbit0_node; + +#if 0 +typedef struct{ + void* root; +} critbit0_tree; +#endif + +int critbit0_contains(critbit0_tree* t,const char* u) { + const uint8_t* ubytes= (void*)u; + const size_t ulen= strlen(u); + uint8_t* p= t->root; + + if (!p) return 0; + + while ((uintptr_t)p & 1) { + critbit0_node* q = (void*)(p-1); + + uint8_t c = 0; + if (q->bytebyte]; + + const int direction = (1+(q->otherbits|c))>>8; + + p = q->child[direction]; + } + + return 0==strcmp(u,(const char*)p); +} + +int critbit0_insert(critbit0_tree* t,const char* u) { + const uint8_t* const ubytes = (void*)u; + const size_t ulen = strlen(u); + uint8_t* p = t->root; + + if (!p) { + char* x = malloc(ulen+1); + if (!x) return 0; + memcpy(x,u,ulen+1); + t->root= x; + return 2; + } + + while (1&(intptr_t)p) { + critbit0_node* q = (void*)(p-1); + + uint8_t c = 0; + if (q->bytebyte]; + const int direction = (1+(q->otherbits|c))>>8; + + p = q->child[direction]; + } + + uint32_t newbyte; + uint32_t newotherbits; + + for (newbyte = 0; newbyte < ulen; ++newbyte) { + if (p[newbyte] != ubytes[newbyte]) { + newotherbits = p[newbyte]^ubytes[newbyte]; + goto different_byte_found; + } + } + + if (p[newbyte]!=0) { + newotherbits = p[newbyte]; + goto different_byte_found; + } + return 1; + +different_byte_found: + + newotherbits |= newotherbits>>1; + newotherbits |= newotherbits>>2; + newotherbits |= newotherbits>>4; + newotherbits = (newotherbits&~(newotherbits>>1))^255; + uint8_t c = p[newbyte]; + int newdirection = (1+(newotherbits|c))>>8; + + critbit0_node* newnode; + if (!(newnode=malloc(sizeof(critbit0_node)))) + return 0; + + char* x; + if (!(x = malloc(ulen+1))) { + free(newnode); + return 0; + } + memcpy(x,ubytes,ulen+1); + + newnode->byte= newbyte; + newnode->otherbits= newotherbits; + newnode->child[1-newdirection]= x; + + void** wherep= &t->root; + for(;;) { + uint8_t* p = *wherep; + if (!((intptr_t)p&1)) + break; + critbit0_node* q = (void*)(p-1); + if (q->byte > newbyte)break; + if (q->byte==newbyte && q->otherbits>newotherbits)break; + uint8_t c = 0; + if (q->bytebyte]; + const int direction = (1+(q->otherbits|c))>>8; + wherep = q->child+direction; + } + + newnode->child[newdirection]= *wherep; + *wherep= (void*)(1+(char*)newnode); + + return 2; +} + +int critbit0_delete(critbit0_tree* t,const char* u) { + const uint8_t* ubytes = (void*)u; + const size_t ulen = strlen(u); + uint8_t* p = t->root; + void** wherep = &t->root; + void** whereq = 0; + critbit0_node* q = 0; + int direction = 0; + + if (!p) return 0; + + while ((intptr_t)p&1) { + whereq = wherep; + q = (void*)(p-1); + uint8_t c = 0; + if (q->bytebyte]; + direction = (1+(q->otherbits|c))>>8; + wherep = q->child+direction; + p = *wherep; + } + + if (0!=strcmp(u,(const char*)p)) + return 0; + free(p); + + if (!whereq) { + t->root = 0; + return 1; + } + + *whereq = q->child[1-direction]; + free(q); + + return 1; +} + +static void traverse(void* top) { + uint8_t* p = top; + + if ((intptr_t)p&1) { + critbit0_node* q = (void*)(p-1); + traverse(q->child[0]); + traverse(q->child[1]); + free(q); + } else { + free(p); + } +} + +void critbit0_clear(critbit0_tree* t) { + if (t->root) + traverse(t->root); + t->root = NULL; +} + +static int allprefixed_traverse(uint8_t* top,int(*handle)(const char*,void*),void* arg) { + if ((uintptr_t)top&1) { + critbit0_node* q = (void*)(top-1); + int direction; + for (direction=0; direction<2; ++direction) + switch (allprefixed_traverse(q->child[direction],handle,arg)) { + case 1: break; + case 0: return 0; + default: return-1; + } + return 1; + } + + return handle((const char*)top,arg); +} + +int critbit0_allprefixed(critbit0_tree* t,const char* prefix,int(*handle)(const char*,void*),void* arg) { + const uint8_t* ubytes = (void*)prefix; + const size_t ulen = strlen(prefix); + uint8_t* p = t->root; + uint8_t* top = p; + + if (!p) return 1; + + while ((uintptr_t)p&1) { + critbit0_node* q = (void*)(p-1); + uint8_t c = 0; + if (q->bytebyte]; + const int direction = (1+(q->otherbits|c))>>8; + p = q->child[direction]; + if (q->byte + +critbit0_tree cb = { 0 }; + +int \fBcritbit0_allprefixed\fP(critbit0_tree* \fIcb\fR,const char* \fIprefix\fR, + int (*handle)(const char*,void*),void* arg); +.SH DESCRIPTION +critbit0_allprefixed calls the \fIhandle\fR function on all members of +\fIcb\fR that start with \fIprefix\fR. It passes \fIarg\fR as second +argument to \fIhandle\fR. + +If \fIhandle\fR returns 0, the search stops and critbit0_allprefixed +returns 0. + +If \fIhandle\fR returns 1, the search continues and critbit0_allprefixed +returns 1 if all keys with the given prefix were handled. + +If \fIhandle\fR returns something else, the search stops and +critbit0_allprefixed returns -1. +.SH "RETURN VALUE" +1 if all keys were found and handled (or if there were no keys with that +prefix). + +0 if at least one key was found, \fIhandle\fR was called and returned 0. + +-1 if at least one key was found, \fIhandle\fR was called and returned +something other than 0 or 1. +.SH "SEE ALSO" +critbit0_contains(3) diff --git a/critbit/critbit0_clear.3 b/critbit/critbit0_clear.3 new file mode 100644 index 0000000..486bbb7 --- /dev/null +++ b/critbit/critbit0_clear.3 @@ -0,0 +1,16 @@ +.TH critbit0_clear 3 +.SH NAME +critbit0_clear \- free all memory associated with a critbit tree +.SH SYNTAX +.B #include + +critbit0_tree cb = { 0 }; + +void \fBcritbit0_clear\fP(critbit0_tree* \fIcb\fR); +.SH DESCRIPTION +critbit0_clear deletes all keys in \fIcb\fR and frees all memory +associated with it. +.SH "RETURN VALUE" +none. +.SH "SEE ALSO" +critbit0_delete(3) diff --git a/critbit/critbit0_contains.3 b/critbit/critbit0_contains.3 new file mode 100644 index 0000000..a02185b --- /dev/null +++ b/critbit/critbit0_contains.3 @@ -0,0 +1,17 @@ +.TH critbit0_contains 3 +.SH NAME +critbit0_contains \- check whether a string is in the critbit tree +.SH SYNTAX +.B #include + +critbit0_tree cb = { 0 }; + +int \fBcritbit0_contains\fP(critbit0_tree* \fIcb\fR,const char* \fIstr\fR); +.SH DESCRIPTION +critbit0_contains looks up the given string in the critbit0 tree. +If \fIstr\fR is in \fIcb\fR, critbit0_contains returns 1. Otherwise it +returns 0. +.SH "RETURN VALUE" +1 if the key was found, 0 otherwise. +.SH "SEE ALSO" +critbit0_insert(3) diff --git a/critbit/critbit0_delete.3 b/critbit/critbit0_delete.3 new file mode 100644 index 0000000..48508a5 --- /dev/null +++ b/critbit/critbit0_delete.3 @@ -0,0 +1,18 @@ +.TH critbit0_delete 3 +.SH NAME +critbit0_delete \- delete a string from a critbit tree +.SH SYNTAX +.B #include + +critbit0_tree cb = { 0 }; + +int \fBcritbit0_delete\fP(critbit0_tree* \fIcb\fR,const char* \fIstr\fR); +.SH DESCRIPTION +critbit0_delete attempts to delete a string from a critbit0 tree. +If \fIstr\fR is in \fIcb\fR, critbit0_delete removes it and returns 1. +If \fIstr\fR is not in \fIcb\fR, critbit0_delete leaves it alone and +returns 0. +.SH "RETURN VALUE" +1 if it was in cb and has now been removed, 0 if it was not in cb. +.SH "SEE ALSO" +critbit0_insert(3) diff --git a/critbit/critbit0_insert.3 b/critbit/critbit0_insert.3 new file mode 100644 index 0000000..13b409a --- /dev/null +++ b/critbit/critbit0_insert.3 @@ -0,0 +1,21 @@ +.TH critbit0_insert 3 +.SH NAME +critbit0_insert \- insert a string into a critbit tree +.SH SYNTAX +.B #include + +critbit0_tree cb = { 0 }; + +int \fBcritbit0_insert\fP(critbit0_tree* \fIcb\fR,const char* \fIstr\fR); +.SH DESCRIPTION +critbit0_insert attempts to insert a string into a critbit0 tree. +If \fIstr\fR is already in \fIcb\fR, critbit0_insert returns 1. +If \fIstr\fR is not in \fIcb\fR, it is inserted and critbit0_insert +returns 2. +If there is a memory allocation failure on the way, critbit0_insert +leaves \fIcb\fR alone and returns 0. +.SH "RETURN VALUE" +2 if the key was inserted, 1 if it was already in cb, 0 on memory +allocation failure. +.SH "SEE ALSO" +critbit0_contains(3) diff --git a/t.c b/t.c index ca2192e..57d7ede 100644 --- a/t.c +++ b/t.c @@ -23,6 +23,8 @@ #include "iob.h" #include "safemult.h" #include "iarray.h" +#include "critbit.h" +#include #include "CAS.h" @@ -47,7 +49,47 @@ static int64 writecb(int64 fd,const void* buf,uint64 n) { return -1; } +static int ret0(const char* s,void* foo) { + (void)foo; + assert(strcmp(s,"fnord")==0); + return 0; +} + +static int ret1(const char* s,void* foo) { + static int i; + (void)foo; + switch (i) { + case 0: assert(strcmp(s,"fnord")==0); break; + case 1: assert(strcmp(s,"fnord2")==0); break; + default: return -1; + } + ++i; + return 1; +} + int main(int argc,char* argv[]) { + static critbit0_tree t; + assert(critbit0_insert(&t,"fnord")==2); + assert(critbit0_insert(&t,"fnord2")==2); + assert(critbit0_insert(&t,"fnord2")==1); + assert(critbit0_contains(&t,"foo")==0); + assert(critbit0_contains(&t,"fnord")==1); + assert(critbit0_allprefixed(&t,"fnord",ret1,NULL)==1); + assert(critbit0_allprefixed(&t,"fnord",ret0,NULL)==0); + assert(critbit0_delete(&t,"fnord2")==1); + assert(critbit0_delete(&t,"foo")==0); +#if 0 + int s = socket_tcp6(); +#endif +#if 0 + iarray i; + iarray_init(&i,sizeof(size_t)); + printf("%p\n",iarray_get(&i,0)); + printf("%p\n",iarray_allocate(&i,0)); + printf("%p\n",iarray_allocate(&i,0)); + printf("%p\n",iarray_get(&i,0)); +#endif +#if 0 char buf[1024]; size_t l; unsigned char c; @@ -70,6 +112,7 @@ int main(int argc,char* argv[]) { f 0 9 d 8 4 9 e */ +#endif #if 0 static size_t x; x=23;