Let's use rax to generate tab completions?

This commit is contained in:
Simon Forman 2023-02-20 11:19:27 -08:00
parent b32a3f2496
commit 7d93262c81
3 changed files with 401 additions and 0 deletions

View File

@ -45,3 +45,6 @@ clean:
linenoise.o: linenoise.c linenoise.h linenoise.o: linenoise.c linenoise.h
cc -c linenoise.c -o linenoise.o cc -c linenoise.c -o linenoise.o
tryrax: try_rax.c
cc try_rax.c rax.o -lm -o tryrax

216
implementations/C/rax.h Normal file
View File

@ -0,0 +1,216 @@
/* Rax -- A radix tree implementation.
*
* Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RAX_H
#define RAX_H
#include <stdint.h>
/* Representation of a radix tree as implemented in this file, that contains
* the strings "foo", "foobar" and "footer" after the insertion of each
* word. When the node represents a key inside the radix tree, we write it
* between [], otherwise it is written between ().
*
* This is the vanilla representation:
*
* (f) ""
* \
* (o) "f"
* \
* (o) "fo"
* \
* [t b] "foo"
* / \
* "foot" (e) (a) "foob"
* / \
* "foote" (r) (r) "fooba"
* / \
* "footer" [] [] "foobar"
*
* However, this implementation implements a very common optimization where
* successive nodes having a single child are "compressed" into the node
* itself as a string of characters, each representing a next-level child,
* and only the link to the node representing the last character node is
* provided inside the representation. So the above representation is turned
* into:
*
* ["foo"] ""
* |
* [t b] "foo"
* / \
* "foot" ("er") ("ar") "foob"
* / \
* "footer" [] [] "foobar"
*
* However this optimization makes the implementation a bit more complex.
* For instance if a key "first" is added in the above radix tree, a
* "node splitting" operation is needed, since the "foo" prefix is no longer
* composed of nodes having a single child one after the other. This is the
* above tree and the resulting node splitting after this event happens:
*
*
* (f) ""
* /
* (i o) "f"
* / \
* "firs" ("rst") (o) "fo"
* / \
* "first" [] [t b] "foo"
* / \
* "foot" ("er") ("ar") "foob"
* / \
* "footer" [] [] "foobar"
*
* Similarly after deletion, if a new chain of nodes having a single child
* is created (the chain must also not include nodes that represent keys),
* it must be compressed back into a single node.
*
*/
#define RAX_NODE_MAX_SIZE ((1<<29)-1)
typedef struct raxNode {
uint32_t iskey:1; /* Does this node contain a key? */
uint32_t isnull:1; /* Associated value is NULL (don't store it). */
uint32_t iscompr:1; /* Node is compressed. */
uint32_t size:29; /* Number of children, or compressed string len. */
/* Data layout is as follows:
*
* If node is not compressed we have 'size' bytes, one for each children
* character, and 'size' raxNode pointers, point to each child node.
* Note how the character is not stored in the children but in the
* edge of the parents:
*
* [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
*
* if node is compressed (iscompr bit is 1) the node has 1 children.
* In that case the 'size' bytes of the string stored immediately at
* the start of the data section, represent a sequence of successive
* nodes linked one after the other, for which only the last one in
* the sequence is actually represented as a node, and pointed to by
* the current compressed node.
*
* [header iscompr=1][xyz][z-ptr](value-ptr?)
*
* Both compressed and not compressed nodes can represent a key
* with associated data in the radix tree at any level (not just terminal
* nodes).
*
* If the node has an associated key (iskey=1) and is not NULL
* (isnull=0), then after the raxNode pointers poiting to the
* children, an additional value pointer is present (as you can see
* in the representation above as "value-ptr" field).
*/
unsigned char data[];
} raxNode;
typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
} rax;
/* Stack data structure used by raxLowWalk() in order to, optionally, return
* a list of parent nodes to the caller. The nodes do not have a "parent"
* field for space concerns, so we use the auxiliary stack when needed. */
#define RAX_STACK_STATIC_ITEMS 32
typedef struct raxStack {
void **stack; /* Points to static_items or an heap allocated array. */
size_t items, maxitems; /* Number of items contained and total space. */
/* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
* and use this static array of pointers instead. */
void *static_items[RAX_STACK_STATIC_ITEMS];
int oom; /* True if pushing into this stack failed for OOM at some point. */
} raxStack;
/* Optional callback used for iterators and be notified on each rax node,
* including nodes not representing keys. If the callback returns true
* the callback changed the node pointer in the iterator structure, and the
* iterator implementation will have to replace the pointer in the radix tree
* internals. This allows the callback to reallocate the node to perform
* very special operations, normally not needed by normal applications.
*
* This callback is used to perform very low level analysis of the radix tree
* structure, scanning each possible node (but the root node), or in order to
* reallocate the nodes to reduce the allocation fragmentation (this is the
* Redis application for this callback).
*
* This is currently only supported in forward iterations (raxNext) */
typedef int (*raxNodeCallback)(raxNode **noderef);
/* Radix tree iterator state is encapsulated into this data structure. */
#define RAX_ITER_STATIC_LEN 128
#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
element for the first iteration and
clear the flag. */
#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
iterating. But it is slower. */
typedef struct raxIterator {
int flags;
rax *rt; /* Radix tree we are iterating. */
unsigned char *key; /* The current string. */
void *data; /* Data associated to this key. */
size_t key_len; /* Current key length. */
size_t key_max; /* Max key len the current key buffer can hold. */
unsigned char key_static_string[RAX_ITER_STATIC_LEN];
raxNode *node; /* Current node. Only for unsafe iteration. */
raxStack stack; /* Stack used for unsafe iteration. */
raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
} raxIterator;
/* A special pointer returned for not found items. */
extern void *raxNotFound;
/* Exported API. */
rax *raxNew(void);
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
void *raxFind(rax *rax, unsigned char *s, size_t len);
void raxFree(rax *rax);
void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
void raxStart(raxIterator *it, rax *rt);
int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
int raxNext(raxIterator *it);
int raxPrev(raxIterator *it);
int raxRandomWalk(raxIterator *it, size_t steps);
int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
void raxStop(raxIterator *it);
int raxEOF(raxIterator *it);
void raxShow(rax *rax);
uint64_t raxSize(rax *rax);
unsigned long raxTouch(raxNode *n);
void raxSetDebugMsg(int onoff);
/* Internal API. May be used by the node callback in order to access rax nodes
* in a low level way, so this function is exported as well. */
void raxSetData(raxNode *n, void *data);
#endif

182
implementations/C/try_rax.c Normal file
View File

@ -0,0 +1,182 @@
#include <stddef.h>
#include <math.h>
#include "rax.h"
#define INSERT(key) raxInsert(rt, (unsigned char*)(key), sizeof((key)), NULL, NULL);
int
main()
{
rax *rt = raxNew();
INSERT("=")
INSERT(">")
INSERT("<")
INSERT("!=")
INSERT("<>")
INSERT("<=")
INSERT(">=")
INSERT("%")
INSERT("mod")
INSERT("*")
INSERT("mul")
INSERT("+")
INSERT("add")
INSERT("-")
INSERT("sub")
INSERT("/")
INSERT("div")
INSERT("lshift")
INSERT("rshift")
INSERT("bool")
INSERT("branch")
INSERT("clear")
INSERT("cmp")
INSERT("cons")
INSERT("concat")
INSERT("dip")
INSERT("dup")
INSERT("first")
INSERT("i")
INSERT("inscribe")
INSERT("loop")
INSERT("pop")
INSERT("rest")
INSERT("stack")
INSERT("swaack")
INSERT("swap")
INSERT("fn")
INSERT("eq")
INSERT("gt")
INSERT("lt")
INSERT("neq")
INSERT("le")
INSERT("ge")
INSERT("--")
INSERT("?")
INSERT("and")
INSERT("++")
INSERT("or")
INSERT("!-")
INSERT("<{}")
INSERT("<<{}")
INSERT("abs")
INSERT("anamorphism")
INSERT("app1")
INSERT("app2")
INSERT("app3")
INSERT("appN")
INSERT("at")
INSERT("average")
INSERT("b")
INSERT("binary")
INSERT("ccccons")
INSERT("ccons")
INSERT("clear")
INSERT("cleave")
INSERT("clop")
INSERT("cmp")
INSERT("codi")
INSERT("codireco")
INSERT("dinfrirst")
INSERT("dipd")
INSERT("disenstacken")
INSERT("divmod")
INSERT("down_to_zero")
INSERT("drop")
INSERT("dupd")
INSERT("dupdd")
INSERT("dupdip")
INSERT("dupdipd")
INSERT("enstacken")
INSERT("first")
INSERT("flatten")
INSERT("fork")
INSERT("fourth")
INSERT("gcd")
INSERT("genrec")
INSERT("grabN")
INSERT("grba")
INSERT("hypot")
INSERT("ifte")
INSERT("ii")
INSERT("infra")
INSERT("infrst")
INSERT("make_generator")
INSERT("mod")
INSERT("neg")
INSERT("not")
INSERT("nulco")
INSERT("null")
INSERT("nullary")
INSERT("of")
INSERT("pam")
INSERT("pm")
INSERT("popd")
INSERT("popdd")
INSERT("popop")
INSERT("popopop")
INSERT("popopd")
INSERT("popopdd")
INSERT("product")
INSERT("quoted")
INSERT("range")
INSERT("range_to_zero")
INSERT("reco")
INSERT("rest")
INSERT("reverse")
INSERT("roll>")
INSERT("roll<")
INSERT("rollup")
INSERT("rolldown")
INSERT("rrest")
INSERT("run")
INSERT("second")
INSERT("shift")
INSERT("shunt")
INSERT("size")
INSERT("small")
INSERT("spiral_next")
INSERT("split_at")
INSERT("split_list")
INSERT("sqr")
INSERT("stackd")
INSERT("step_zero")
INSERT("stuncons")
INSERT("sum")
INSERT("swapd")
INSERT("swons")
INSERT("swoncat")
INSERT("tailrec")
INSERT("take")
INSERT("ternary")
INSERT("third")
INSERT("tuck")
INSERT("unary")
INSERT("uncons")
INSERT("unit")
INSERT("unquoted")
INSERT("unstack")
INSERT("unswons")
INSERT("while")
INSERT("x")
INSERT("step")
INSERT("_step0")
INSERT("_step1")
INSERT("_stept")
INSERT("times")
INSERT("_times0")
INSERT("_times1")
INSERT("_timest")
INSERT("map")
INSERT("_map?")
INSERT("_mape")
INSERT("_map0")
INSERT("_map1")
INSERT("_map2")
INSERT("_\\/_")
INSERT("/\\")
INSERT("\\/")
raxShow(rt);
}