#include #include // // Simple cons list. // #define HEAP_SIZE 1024 u32 heads[HEAP_SIZE]; u32 tails[HEAP_SIZE]; u32 free = 1; // cell 0 is reserved so that 0 can be the empty list. #define TYPE_OF(pointer) (pointer >> 30) #define VALUE_OF(pointer) (pointer & 0x3fffffff) #define JOY_VALUE(type, value) ((type & 3) << 30) | (value & 0x3fffffff) u32 empty_list = 0; u8 joyList = 0; u8 joyInt = 1; u8 joySymbol = 2; u8 joyBool = 3; u32 cons(u32 head, u32 tail) { if (free >= HEAP_SIZE) return -1; heads[free] = head; tails[free] = tail; u32 cell = JOY_VALUE(joyList, free); ++free; return cell; } u32 head(u32 list) { return heads[VALUE_OF(list)]; } u32 tail(u32 list) { return tails[VALUE_OF(list)]; } /*u32 cons(u32 head, u32 tail) { return tail; }*/ // And now for a hash table. // https://benhoyt.com/writings/hash-table-in-c/#hash-tables // https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function #define FNV_OFFSET 0xcbf29ce484222325 #define FNV_PRIME 0x100000001b3 u64 hash_key(char* key) { u64 hash = FNV_OFFSET; for (char* p = key; *p; ++p) { hash = hash ^ (u64)(unsigned char)(*p); hash = hash * FNV_PRIME; } return hash; } // Capacity is a power of two (10 for now.) #define EXPONENT 10 #define CAPACITY 1024 #define HASH_MASK 1023 char* hash_table[CAPACITY]; u32 ht_insert(char *symbol) { u64 hash = hash_key(symbol); u32 index = hash % CAPACITY; char *candidate = hash_table[index]; if (!candidate) { //print_str("interning ");print_str(symbol);print_endl(); hash_table[index] = symbol; return JOY_VALUE(joySymbol, VALUE_OF(hash)); } // https://en.wikipedia.org/wiki/Double_hashing // Rather than use another hash function I'm going to try // using the extra bits of the same hash. u32 increment = ((VALUE_OF(hash) >> EXPONENT) | 1) % CAPACITY; // If I understand correctly, making the increment odd // means it will traverse the whole (even-sized) table. while (candidate) { // Compare pointers then hashes (since we already have // one hash I'm guessing that that's cheaper or at least // no more expensive than string comparision.) if (candidate == symbol || hash == hash_key(candidate)) break; index = (index + increment) % CAPACITY; candidate = hash_table[index]; } if (!candidate) { //print_str("interning ");print_str(symbol);print_endl(); hash_table[index] = symbol; } return JOY_VALUE(joySymbol, VALUE_OF(hash)); } char* ht_lookup(u32 hash) { // Note that hash will be truncated to N (N=30 as it happens) bits // by VALUE_OF(). u32 index = hash % CAPACITY; char *candidate = hash_table[index]; u32 increment = ((hash >> EXPONENT) | 1) % CAPACITY; while (candidate) { if (hash == VALUE_OF(hash_key(candidate))) return candidate; index = (index + increment) % CAPACITY; candidate = hash_table[index]; } /*error = UNKNOWN_WORD_ERROR;*/ return 0; } // // Simple string storage heap. // #define STRING_HEAP_SIZE 100000 char string_heap[STRING_HEAP_SIZE]; u32 string_heap_top = 0; char* allocate_string(char *buffer, u32 offset, u32 length) { u64 end = string_heap_top + length + 1; if (end >= STRING_HEAP_SIZE) return 0; memcpy(string_heap + string_heap_top, buffer + offset, length); string_heap[end] = '\0'; u32 new_string = string_heap_top; string_heap_top = (u32)end + 1; //print_str("allocating ");print_str(string_heap + new_string);print_endl(); return string_heap + new_string; } u32 push_symbol(char *symbol, u32 stack) { return cons(JOY_VALUE(joySymbol, ht_insert(symbol)), stack); } char* LEFT_BRACKET_symbol = "["; char* RIGHT_BRACKET_symbol = "]"; u32 LEFT_BRACKET; u32 RIGHT_BRACKET; bool is_integer(char *str, u32 index, u32 length) { for (;length; --length) { char ch = *(str + index + length - 1); if (!(ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' || ch == '8' || ch == '9')) { return 0; } } return 1; } u32 convert_integer(char *str, u32 index, u32 length) { u32 result = 0; length = length + index; for (; index < length; ++index) { char ch = *(str + index); u8 digit = (u8)ch - (u8)'0'; result = result * 10 + digit; } //print_str("converted integer ");print_i64(result);print_endl(); return JOY_VALUE(joyInt, result); } u32 tokenate(char *str, u32 index, u32 length) { if (4 == length && *(str + index) == 't' && *(str + index + 1) == 'r' && *(str + index + 2) == 'u' && *(str + index + 3) == 'e' ) { //print_str("tokenate true");print_endl(); return JOY_VALUE(joyBool, 1); } if (5 == length && *(str + index) == 'f' && *(str + index + 1) == 'a' && *(str + index + 2) == 'l' && *(str + index + 3) == 's' && *(str + index + 4) == 'e' ) { //print_str("tokenate false");print_endl(); return JOY_VALUE(joyBool, 0); } if (is_integer(str, index, length)) { //print_str("tokenate integer");print_endl(); return convert_integer(str, index, length); } // TODO: Convert bools and ints here? // Use ht_insert to avoid multiple allocations of the same string! char *token = allocate_string(str, index, length); if (!token) return 0; // OOM return JOY_VALUE(joySymbol, ht_insert(token)); } u32 tokenize0(char *str, u32 str_length, u32 index, u32 acc) { if (index >= str_length) { //print_i64(index);print_str(" : ");print_str("END tokenize");print_endl(); //print_i64(acc);print_str("<");print_endl(); return acc; } //print_i64(index);print_str(" : ");print_str(str + index);print_endl(); char ch = str[index]; if ('[' == ch) { acc = cons(LEFT_BRACKET, tokenize0(str, str_length, index + 1, acc)); //print_i64(acc);print_str("<[");print_endl(); return acc; } if (']' == ch) { acc = cons(RIGHT_BRACKET, tokenize0(str, str_length, index + 1, acc)); //print_i64(acc);print_str("<]");print_endl(); return acc; } if (' ' == ch) { return tokenize0(str, str_length, index + 1, acc); } u32 i = index + 1; for (; i < str_length; ++i) { if (str[i] == '[' || str[i] == ']' || str[i] == ' ') { break; } } // i == str_length OR str[i] is a delimiter char. return cons(tokenate(str, index, i - index), tokenize0(str, str_length, i, acc)); } u32 tokenize(char *str) { return tokenize0(str, strlen(str), 0, empty_list); } u32 rev(u32 el, u32 end) { u32 t = tail(el); tails[el] = end; if (t) { return rev(t, el); } return el; } u32 reverse_list_in_place(u32 el) { if (!el) { // Empty list. return el; } u32 t = tail(el); if (!t) { // Length-one list. return el; } // Re-write tails[el] to point to null tails[el] = empty_list; // reverse the tail of this list and prepend it to [el]. return rev(t, el); } u32 stack[1000]; u32 stack_top = 0; u32 text_to_expression(char *str) { u32 frame = empty_list; u32 tokens = tokenize(str); //print_str("tokens: "); print_joy_list(tokens); print_endl(); //return tokens; while (tokens) { u32 tok = head(tokens); tokens = tail(tokens); if (LEFT_BRACKET == tok) { //print_str("left bracket");print_endl(); stack[stack_top] = frame; ++stack_top; frame = empty_list; continue; } if (RIGHT_BRACKET == tok) { //print_str("right bracket");print_endl(); tok = reverse_list_in_place(frame); //print_str("new list: "); print_joy_list(tok); print_endl(); --stack_top; // This first! THEN get the old frame! D'oh! frame = stack[stack_top]; } frame = cons(tok, frame); //print_str("t2e frame: "); print_joy_list(frame); print_endl(); } return reverse_list_in_place(frame); } void print_joy_value(u32 jv) { u8 type = TYPE_OF(jv); if (type == joyInt) { print_i64(VALUE_OF(jv)); } else if (type == joyBool) { print_str(VALUE_OF(jv) ? "true" : "false"); } else if (type == joyList) { print_str("["); print_joy_list(jv); print_str("]"); } else if (type == joySymbol) { char *str = ht_lookup(VALUE_OF(jv)); /*if (error != NO_ERROR)*/ /* return;*/ print_str(str); } } void print_joy_list(u32 list) { while (list) { print_joy_value(head(list)); list = tail(list); if (list) { print_str(" "); } } } void main() { memset(string_heap, 0, sizeof(string_heap)); LEFT_BRACKET = JOY_VALUE(joySymbol, ht_insert(LEFT_BRACKET_symbol)); RIGHT_BRACKET = JOY_VALUE(joySymbol, ht_insert(RIGHT_BRACKET_symbol)); //char *buffer = " 1[2[ 3 cats ]4] cats dogs bunnies"; char *buffer = " 1[2]3 bunnies"; /*print_str(allocate_string(buffer, 4, 4)); print_endl();*/ /*print_str(allocate_string(buffer, 2, 4)); print_endl();*/ /*print_str(allocate_string(buffer, 7, 5)); print_endl();*/ u32 jv = text_to_expression(" 1[2[true 3][[]]bob]false[]bob 3[4]5"); //jv = reverse_list_in_place(jv); if (LEFT_BRACKET == jv) { print_str("boo"); } else { //jv = JOY_VALUE(joyList, jv); print_joy_list(jv); } print_endl(); }