diff --git a/implementations/C/Makefile b/implementations/C/Makefile index 7e9f956..4f28bb4 100644 --- a/implementations/C/Makefile +++ b/implementations/C/Makefile @@ -45,3 +45,6 @@ clean: linenoise.o: linenoise.c linenoise.h cc -c linenoise.c -o linenoise.o + +tryrax: try_rax.c + cc try_rax.c rax.o -lm -o tryrax diff --git a/implementations/C/rax.h b/implementations/C/rax.h new file mode 100644 index 0000000..62e8df7 --- /dev/null +++ b/implementations/C/rax.h @@ -0,0 +1,216 @@ +/* Rax -- A radix tree implementation. + * + * Copyright (c) 2017-2018, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RAX_H +#define RAX_H + +#include + +/* Representation of a radix tree as implemented in this file, that contains + * the strings "foo", "foobar" and "footer" after the insertion of each + * word. When the node represents a key inside the radix tree, we write it + * between [], otherwise it is written between (). + * + * This is the vanilla representation: + * + * (f) "" + * \ + * (o) "f" + * \ + * (o) "fo" + * \ + * [t b] "foo" + * / \ + * "foot" (e) (a) "foob" + * / \ + * "foote" (r) (r) "fooba" + * / \ + * "footer" [] [] "foobar" + * + * However, this implementation implements a very common optimization where + * successive nodes having a single child are "compressed" into the node + * itself as a string of characters, each representing a next-level child, + * and only the link to the node representing the last character node is + * provided inside the representation. So the above representation is turned + * into: + * + * ["foo"] "" + * | + * [t b] "foo" + * / \ + * "foot" ("er") ("ar") "foob" + * / \ + * "footer" [] [] "foobar" + * + * However this optimization makes the implementation a bit more complex. + * For instance if a key "first" is added in the above radix tree, a + * "node splitting" operation is needed, since the "foo" prefix is no longer + * composed of nodes having a single child one after the other. This is the + * above tree and the resulting node splitting after this event happens: + * + * + * (f) "" + * / + * (i o) "f" + * / \ + * "firs" ("rst") (o) "fo" + * / \ + * "first" [] [t b] "foo" + * / \ + * "foot" ("er") ("ar") "foob" + * / \ + * "footer" [] [] "foobar" + * + * Similarly after deletion, if a new chain of nodes having a single child + * is created (the chain must also not include nodes that represent keys), + * it must be compressed back into a single node. + * + */ + +#define RAX_NODE_MAX_SIZE ((1<<29)-1) +typedef struct raxNode { + uint32_t iskey:1; /* Does this node contain a key? */ + uint32_t isnull:1; /* Associated value is NULL (don't store it). */ + uint32_t iscompr:1; /* Node is compressed. */ + uint32_t size:29; /* Number of children, or compressed string len. */ + /* Data layout is as follows: + * + * If node is not compressed we have 'size' bytes, one for each children + * character, and 'size' raxNode pointers, point to each child node. + * Note how the character is not stored in the children but in the + * edge of the parents: + * + * [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?) + * + * if node is compressed (iscompr bit is 1) the node has 1 children. + * In that case the 'size' bytes of the string stored immediately at + * the start of the data section, represent a sequence of successive + * nodes linked one after the other, for which only the last one in + * the sequence is actually represented as a node, and pointed to by + * the current compressed node. + * + * [header iscompr=1][xyz][z-ptr](value-ptr?) + * + * Both compressed and not compressed nodes can represent a key + * with associated data in the radix tree at any level (not just terminal + * nodes). + * + * If the node has an associated key (iskey=1) and is not NULL + * (isnull=0), then after the raxNode pointers poiting to the + * children, an additional value pointer is present (as you can see + * in the representation above as "value-ptr" field). + */ + unsigned char data[]; +} raxNode; + +typedef struct rax { + raxNode *head; + uint64_t numele; + uint64_t numnodes; +} rax; + +/* Stack data structure used by raxLowWalk() in order to, optionally, return + * a list of parent nodes to the caller. The nodes do not have a "parent" + * field for space concerns, so we use the auxiliary stack when needed. */ +#define RAX_STACK_STATIC_ITEMS 32 +typedef struct raxStack { + void **stack; /* Points to static_items or an heap allocated array. */ + size_t items, maxitems; /* Number of items contained and total space. */ + /* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap + * and use this static array of pointers instead. */ + void *static_items[RAX_STACK_STATIC_ITEMS]; + int oom; /* True if pushing into this stack failed for OOM at some point. */ +} raxStack; + +/* Optional callback used for iterators and be notified on each rax node, + * including nodes not representing keys. If the callback returns true + * the callback changed the node pointer in the iterator structure, and the + * iterator implementation will have to replace the pointer in the radix tree + * internals. This allows the callback to reallocate the node to perform + * very special operations, normally not needed by normal applications. + * + * This callback is used to perform very low level analysis of the radix tree + * structure, scanning each possible node (but the root node), or in order to + * reallocate the nodes to reduce the allocation fragmentation (this is the + * Redis application for this callback). + * + * This is currently only supported in forward iterations (raxNext) */ +typedef int (*raxNodeCallback)(raxNode **noderef); + +/* Radix tree iterator state is encapsulated into this data structure. */ +#define RAX_ITER_STATIC_LEN 128 +#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current + element for the first iteration and + clear the flag. */ +#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */ +#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while + iterating. But it is slower. */ +typedef struct raxIterator { + int flags; + rax *rt; /* Radix tree we are iterating. */ + unsigned char *key; /* The current string. */ + void *data; /* Data associated to this key. */ + size_t key_len; /* Current key length. */ + size_t key_max; /* Max key len the current key buffer can hold. */ + unsigned char key_static_string[RAX_ITER_STATIC_LEN]; + raxNode *node; /* Current node. Only for unsafe iteration. */ + raxStack stack; /* Stack used for unsafe iteration. */ + raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */ +} raxIterator; + +/* A special pointer returned for not found items. */ +extern void *raxNotFound; + +/* Exported API. */ +rax *raxNew(void); +int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); +int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old); +int raxRemove(rax *rax, unsigned char *s, size_t len, void **old); +void *raxFind(rax *rax, unsigned char *s, size_t len); +void raxFree(rax *rax); +void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)); +void raxStart(raxIterator *it, rax *rt); +int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len); +int raxNext(raxIterator *it); +int raxPrev(raxIterator *it); +int raxRandomWalk(raxIterator *it, size_t steps); +int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len); +void raxStop(raxIterator *it); +int raxEOF(raxIterator *it); +void raxShow(rax *rax); +uint64_t raxSize(rax *rax); +unsigned long raxTouch(raxNode *n); +void raxSetDebugMsg(int onoff); + +/* Internal API. May be used by the node callback in order to access rax nodes + * in a low level way, so this function is exported as well. */ +void raxSetData(raxNode *n, void *data); + +#endif diff --git a/implementations/C/try_rax.c b/implementations/C/try_rax.c new file mode 100644 index 0000000..a9cee11 --- /dev/null +++ b/implementations/C/try_rax.c @@ -0,0 +1,182 @@ +#include +#include +#include "rax.h" + + +#define INSERT(key) raxInsert(rt, (unsigned char*)(key), sizeof((key)), NULL, NULL); + +int +main() +{ + rax *rt = raxNew(); + INSERT("=") + INSERT(">") + INSERT("<") + INSERT("!=") + INSERT("<>") + INSERT("<=") + INSERT(">=") + INSERT("%") + INSERT("mod") + INSERT("*") + INSERT("mul") + INSERT("+") + INSERT("add") + INSERT("-") + INSERT("sub") + INSERT("/") + INSERT("div") + INSERT("lshift") + INSERT("rshift") + INSERT("bool") + INSERT("branch") + INSERT("clear") + INSERT("cmp") + INSERT("cons") + INSERT("concat") + INSERT("dip") + INSERT("dup") + INSERT("first") + INSERT("i") + INSERT("inscribe") + INSERT("loop") + INSERT("pop") + INSERT("rest") + INSERT("stack") + INSERT("swaack") + INSERT("swap") + INSERT("fn") + INSERT("eq") + INSERT("gt") + INSERT("lt") + INSERT("neq") + INSERT("le") + INSERT("ge") + INSERT("--") + INSERT("?") + INSERT("and") + INSERT("++") + INSERT("or") + INSERT("!-") + INSERT("<{}") + INSERT("<<{}") + INSERT("abs") + INSERT("anamorphism") + INSERT("app1") + INSERT("app2") + INSERT("app3") + INSERT("appN") + INSERT("at") + INSERT("average") + INSERT("b") + INSERT("binary") + INSERT("ccccons") + INSERT("ccons") + INSERT("clear") + INSERT("cleave") + INSERT("clop") + INSERT("cmp") + INSERT("codi") + INSERT("codireco") + INSERT("dinfrirst") + INSERT("dipd") + INSERT("disenstacken") + INSERT("divmod") + INSERT("down_to_zero") + INSERT("drop") + INSERT("dupd") + INSERT("dupdd") + INSERT("dupdip") + INSERT("dupdipd") + INSERT("enstacken") + INSERT("first") + INSERT("flatten") + INSERT("fork") + INSERT("fourth") + INSERT("gcd") + INSERT("genrec") + INSERT("grabN") + INSERT("grba") + INSERT("hypot") + INSERT("ifte") + INSERT("ii") + INSERT("infra") + INSERT("infrst") + INSERT("make_generator") + INSERT("mod") + INSERT("neg") + INSERT("not") + INSERT("nulco") + INSERT("null") + INSERT("nullary") + INSERT("of") + INSERT("pam") + INSERT("pm") + INSERT("popd") + INSERT("popdd") + INSERT("popop") + INSERT("popopop") + INSERT("popopd") + INSERT("popopdd") + INSERT("product") + INSERT("quoted") + INSERT("range") + INSERT("range_to_zero") + INSERT("reco") + INSERT("rest") + INSERT("reverse") + INSERT("roll>") + INSERT("roll<") + INSERT("rollup") + INSERT("rolldown") + INSERT("rrest") + INSERT("run") + INSERT("second") + INSERT("shift") + INSERT("shunt") + INSERT("size") + INSERT("small") + INSERT("spiral_next") + INSERT("split_at") + INSERT("split_list") + INSERT("sqr") + INSERT("stackd") + INSERT("step_zero") + INSERT("stuncons") + INSERT("sum") + INSERT("swapd") + INSERT("swons") + INSERT("swoncat") + INSERT("tailrec") + INSERT("take") + INSERT("ternary") + INSERT("third") + INSERT("tuck") + INSERT("unary") + INSERT("uncons") + INSERT("unit") + INSERT("unquoted") + INSERT("unstack") + INSERT("unswons") + INSERT("while") + INSERT("x") + INSERT("step") + INSERT("_step0") + INSERT("_step1") + INSERT("_stept") + INSERT("times") + INSERT("_times0") + INSERT("_times1") + INSERT("_timest") + INSERT("map") + INSERT("_map?") + INSERT("_mape") + INSERT("_map0") + INSERT("_map1") + INSERT("_map2") + INSERT("_\\/_") + INSERT("/\\") + INSERT("\\/") + raxShow(rt); +} +