diff --git a/implementations/uvm-ncc/joy.c b/implementations/uvm-ncc/joy.c new file mode 100644 index 0000000..e08b6ef --- /dev/null +++ b/implementations/uvm-ncc/joy.c @@ -0,0 +1,581 @@ +// Copyright © 2023 Simon Forman +// +// This file is part of Thun +// +// Thun is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Thun is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Thun. If not see . +// +#include +#include +/* +███████╗██████╗ ██████╗ ██████╗ ██████╗ +██╔════╝██╔══██╗██╔══██╗██╔═══██╗██╔══██╗ +█████╗ ██████╔╝██████╔╝██║ ██║██████╔╝ +██╔══╝ ██╔══██╗██╔══██╗██║ ██║██╔══██╗ +███████╗██║ ██║██║ ██║╚██████╔╝██║ ██║ +╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝ + +██╗ ██╗ █████╗ ███╗ ██╗██████╗ ██╗ ██╗███╗ ██╗ ██████╗ +██║ ██║██╔══██╗████╗ ██║██╔══██╗██║ ██║████╗ ██║██╔════╝ +███████║███████║██╔██╗ ██║██║ ██║██║ ██║██╔██╗ ██║██║ ███╗ +██╔══██║██╔══██║██║╚██╗██║██║ ██║██║ ██║██║╚██╗██║██║ ██║ +██║ ██║██║ ██║██║ ╚████║██████╔╝███████╗██║██║ ╚████║╚██████╔╝ +╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═══╝╚═════╝ ╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +No setjmp/longjmp, so let's have a global error value and check it after ops. +*/ + +u64 error = 0; + +#define NO_ERROR 0 +#define UNKNOWN_WORD_ERROR 1 +#define MISSING_CLOSING_BRACKET 2 +#define EXTRA_CLOSING_BRACKET 3 +#define CONS_HEAP_OOM 4 +#define STRING_HEAP_OOM 5 + +/* +char *error_messages[3] = { + "", + "Unknown word", + "Missing closing bracket" +}; +*/ + +/* + ██████╗ ██████╗ ███╗ ██╗███████╗ ██╗ ██╗███████╗ █████╗ ██████╗ +██╔════╝██╔═══██╗████╗ ██║██╔════╝ ██║ ██║██╔════╝██╔══██╗██╔══██╗ +██║ ██║ ██║██╔██╗ ██║███████╗ ███████║█████╗ ███████║██████╔╝ +██║ ██║ ██║██║╚██╗██║╚════██║ ██╔══██║██╔══╝ ██╔══██║██╔═══╝ +╚██████╗╚██████╔╝██║ ╚████║███████║ ██║ ██║███████╗██║ ██║██║ + ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ +Cons Heap + +We don't have Unions, Enums, or Typedefs. So how do we represent Joy types? +In SICP they use a pair of arrays of pointers, one for heads and one +for tails. + +> A pointer to a pair is an index into the two vectors. + +*/ + +#define HEAP_SIZE 1024 + +u32 heads[HEAP_SIZE]; +u32 tails[HEAP_SIZE]; + + +// cell 0 is reserved so that 0 can be the empty list. +u32 free = 1; + +// > We also need a representation for objects other than pairs (such as +// > numbers and symbols) and a way to distinguish one kind of data from +// > another. There are many methods of accomplishing this, but they all +// > reduce to using typed pointers, that is, to extending the notion of +// > ``pointer'' to include information on data type. + +// Let's use u32 with the two MSB's for the type tag. + +#define TYPE_OF(pointer) (pointer >> 30) +#define VALUE_OF(pointer) (pointer & 0x3fffffff) +#define JOY_VALUE(type, value) ((type & 3) << 30) | (value & 0x3fffffff) + +/* +This means that our ints are restricted to 30 bits for now, until +I implement bignums. + + +In the Thun dialect of Joy we have four types of values: + +Integers, Booleans, Symbols, and Lists. +*/ +u8 joyList = 0; +u8 joyInt = 1; +u8 joySymbol = 2; +u8 joyBool = 3; + +// Because the type tag for lists is 0 the empty list is just 0; +u32 empty_list = 0; + +u32 +cons(u32 head, u32 tail) +{ + if (free >= HEAP_SIZE) { + error = CONS_HEAP_OOM; + return -1; + } + heads[free] = head; + tails[free] = tail; + u32 cell = JOY_VALUE(joyList, free); + ++free; + return cell; +} + +u32 head(u32 list) { return heads[VALUE_OF(list)]; } +u32 tail(u32 list) { return tails[VALUE_OF(list)]; } + + + +/* +███████╗████████╗██████╗ ██╗███╗ ██╗ ██████╗ +██╔════╝╚══██╔══╝██╔══██╗██║████╗ ██║██╔════╝ +███████╗ ██║ ██████╔╝██║██╔██╗ ██║██║ ███╗ +╚════██║ ██║ ██╔══██╗██║██║╚██╗██║██║ ██║ +███████║ ██║ ██║ ██║██║██║ ╚████║╚██████╔╝ +╚══════╝ ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +██╗ ██╗███████╗ █████╗ ██████╗ +██║ ██║██╔════╝██╔══██╗██╔══██╗ +███████║█████╗ ███████║██████╔╝ +██╔══██║██╔══╝ ██╔══██║██╔═══╝ +██║ ██║███████╗██║ ██║██║ +╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ +Simple string storage heap. + +We need a place to keep symbol strings. + +*/ + +#define STRING_HEAP_SIZE 100000 + +char string_heap[STRING_HEAP_SIZE]; +u32 string_heap_top = 0; + +char* +allocate_string(char *buffer, u32 offset, u32 length) +{ + u64 end = string_heap_top + length + 1; + if (end >= STRING_HEAP_SIZE) { + error = STRING_HEAP_OOM; + return 0; + } + memcpy(string_heap + string_heap_top, buffer + offset, length); + string_heap[end] = '\0'; + u32 new_string = string_heap_top; + string_heap_top = (u32)end + 1; + //print_str("allocating ");print_str(string_heap + new_string);print_endl(); + return string_heap + new_string; +} + + +/* +██████╗ ██████╗ ██╗███╗ ██╗████████╗███████╗██████╗ +██╔══██╗██╔══██╗██║████╗ ██║╚══██╔══╝██╔════╝██╔══██╗ +██████╔╝██████╔╝██║██╔██╗ ██║ ██║ █████╗ ██████╔╝ +██╔═══╝ ██╔══██╗██║██║╚██╗██║ ██║ ██╔══╝ ██╔══██╗ +██║ ██║ ██║██║██║ ╚████║ ██║ ███████╗██║ ██║ +╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ +Printer +*/ + +void +print_joy_value(u32 jv) +{ + u8 type = TYPE_OF(jv); + if (type == joyInt) { + print_i64(VALUE_OF(jv)); + } else if (type == joyBool) { + print_str(VALUE_OF(jv) ? "true" : "false"); + } else if (type == joyList) { + print_str("["); + print_joy_list(jv); + print_str("]"); + } else if (type == joySymbol) { + char *str = ht_lookup(VALUE_OF(jv)); + if (error != NO_ERROR) + return; + print_str(str); + } +} + +void +print_joy_list(u32 list) +{ + while (list) { + print_joy_value(head(list)); + if (error != NO_ERROR) + return; + list = tail(list); + if (list) { + print_str(" "); + } + } +} + + +/* +██╗ ██╗ █████╗ ███████╗██╗ ██╗ +██║ ██║██╔══██╗██╔════╝██║ ██║ +███████║███████║███████╗███████║ +██╔══██║██╔══██║╚════██║██╔══██║ +██║ ██║██║ ██║███████║██║ ██║ +╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝ + +████████╗ █████╗ ██████╗ ██╗ ███████╗ +╚══██╔══╝██╔══██╗██╔══██╗██║ ██╔════╝ + ██║ ███████║██████╔╝██║ █████╗ + ██║ ██╔══██║██╔══██╗██║ ██╔══╝ + ██║ ██║ ██║██████╔╝███████╗███████╗ + ╚═╝ ╚═╝ ╚═╝╚═════╝ ╚══════╝╚══════╝ +And now for a hash table. + +This table maps between hashes of symbol strings which are used in the +tagged pointers in Joy values and strings which are stored in the string +heap. + +TODO: bool ht_has(char *str, u32 index, u32 length) to see if a fragment + of a string buffer is a symbol in the hash table. + +FNV hash function. + +https://benhoyt.com/writings/hash-table-in-c/#hash-tables +https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + +*/ + +#define FNV_OFFSET 0xcbf29ce484222325 +#define FNV_PRIME 0x100000001b3 + +u64 +hash_key(char* key) +{ + u64 hash = FNV_OFFSET; + for (char* p = key; *p; ++p) { + hash = hash ^ (u64)(unsigned char)(*p); + hash = hash * FNV_PRIME; + } + return hash; +} + +u64 +hash_fragment(char *str, u32 index, u32 length) +{ + u64 hash = FNV_OFFSET; + for (char* p = (str + index); length; --length, ++p) { + hash = hash ^ (u64)(unsigned char)(*p); + hash = hash * FNV_PRIME; + } + return hash; +} + +// Capacity is a power of two (10 for now.) +#define EXPONENT 10 +#define CAPACITY 1024 +#define HASH_MASK 1023 + +// Note that there's no checking for filling the table and expanding. +// For now, I'm just going to use a "large enough" table and hope +// for the best. (We have thirty bits to work with so the obvious +// thing to do is make the exponent fifteen, half for the hash key +// and half for the increment.) + +char* hash_table[CAPACITY]; + +u32 +ht_insert(char *symbol) +{ + u64 hash = hash_key(symbol); + u32 index = hash % CAPACITY; + + char *candidate = hash_table[index]; + if (!candidate) { + hash_table[index] = symbol; + return JOY_VALUE(joySymbol, VALUE_OF(hash)); + } + + // https://en.wikipedia.org/wiki/Double_hashing + // Rather than use another hash function I'm going to try + // using the extra bits of the same hash. + u32 increment = ((VALUE_OF(hash) >> EXPONENT) | 1) % CAPACITY; + // If I understand correctly, making the increment odd + // means it will traverse the whole (even-sized) table. + while (candidate) { + // Compare pointers then hashes (since we already have + // one hash I'm guessing that that's cheaper or at least + // no more expensive than string comparision.) + if (candidate == symbol || hash == hash_key(candidate)) + break; + index = (index + increment) % CAPACITY; + candidate = hash_table[index]; + } + if (!candidate) { + hash_table[index] = symbol; + } + return JOY_VALUE(joySymbol, VALUE_OF(hash)); +} + +char* +ht_lookup(u32 hash) +{ + // Note that hash must be truncated to N (N=30 as it happens) bits + // by VALUE_OF(). + u32 index = hash % CAPACITY; + char *candidate = hash_table[index]; + u32 increment = ((hash >> EXPONENT) | 1) % CAPACITY; + while (candidate) { + if (hash == VALUE_OF(hash_key(candidate))) + return candidate; + index = (index + increment) % CAPACITY; + candidate = hash_table[index]; + } + error = UNKNOWN_WORD_ERROR; + return 0; +} + + +u32 +ht_has(char *str, u32 index, u32 length) +{ + u32 hash = VALUE_OF(hash_fragment(str, index, length)); + ht_lookup(hash); + if (UNKNOWN_WORD_ERROR == error) { + error = NO_ERROR; + return 0; + } + return hash; +} + + + + + +/******************************************************************************/ + +u32 +push_symbol(char *symbol, u32 stack) +{ + return cons(JOY_VALUE(joySymbol, ht_insert(symbol)), stack); +} +u32 +push_int(u32 n, u32 stack) +{ + return cons(JOY_VALUE(joyInt, n), stack); +} + +/******************************************************************************/ + +bool +is_integer(char *str, u32 index, u32 length) +{ + for (;length; --length) { + char ch = *(str + index + length - 1); + if (!(ch == '0' + || ch == '1' + || ch == '2' + || ch == '3' + || ch == '4' + || ch == '5' + || ch == '6' + || ch == '7' + || ch == '8' + || ch == '9')) + { + return 0; + } + } + return 1; +} + +u32 +convert_integer(char *str, u32 index, u32 length) +{ + u32 result = 0; + length = length + index; + for (; index < length; ++index) { + char ch = *(str + index); + u8 digit = (u8)ch - (u8)'0'; + result = result * 10 + digit; + } + //print_str("converted integer ");print_i64(result);print_endl(); + return JOY_VALUE(joyInt, result); +} + +/******************************************************************************/ + + +/* +██████╗ █████╗ ██████╗ ███████╗███████╗██████╗ +██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝██╔══██╗ +██████╔╝███████║██████╔╝███████╗█████╗ ██████╔╝ +██╔═══╝ ██╔══██║██╔══██╗╚════██║██╔══╝ ██╔══██╗ +██║ ██║ ██║██║ ██║███████║███████╗██║ ██║ +╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═╝ +Parser + +*/ + + +u32 +intern(char *str, u32 index, u32 length) +{ + u32 symbol_hash = ht_has(str, index, length); + if (!symbol_hash) { + char *token = allocate_string(str, index, length); + if (error != NO_ERROR) { + //print_str("a. Error code: ");print_i64(error);print_endl(); + return 0; + } + symbol_hash = ht_insert(token); + } + return JOY_VALUE(joySymbol, symbol_hash); +} + + +u32 +tokenate(char *str, u32 index, u32 length) +{ + if (4 == length + && *(str + index) == 't' + && *(str + index + 1) == 'r' + && *(str + index + 2) == 'u' + && *(str + index + 3) == 'e' + ) { + //print_str("tokenate true");print_endl(); + return JOY_VALUE(joyBool, 1); + } + if (5 == length + && *(str + index) == 'f' + && *(str + index + 1) == 'a' + && *(str + index + 2) == 'l' + && *(str + index + 3) == 's' + && *(str + index + 4) == 'e' + ) { + //print_str("tokenate false");print_endl(); + return JOY_VALUE(joyBool, 0); + } + if (is_integer(str, index, length)) { + //print_str("tokenate integer");print_endl(); + return convert_integer(str, index, length); + } + return intern(str, index, length); +} + + +int +is_delimiter(char ch) +{ + return ch == '[' || ch == ']' || ch == ' '; +} + +// Store in-progress lists. Here as in the hash table I'm not checking +// for capacity overload or anything like that. If you think you're going +// to parse more than a five hundred '[' chars then increase the size of +// this array. +u32 t2e_stack[1000]; +u32 t2e_stack_top = 0; + +#define T2E_PUSH(thing) t2e_stack[t2e_stack_top] = (thing); ++t2e_stack_top; (thing) = empty_list; +#define T2E_POP(thing) if (!t2e_stack_top) { error = EXTRA_CLOSING_BRACKET; return 0; }; --t2e_stack_top; (thing) = t2e_stack[t2e_stack_top]; + +u32 +text_to_expression(char *str) +{ + u32 index = 0; + u32 end = empty_list; + u32 top = empty_list; + u32 tok = empty_list; + u64 str_length = strlen(str); // TODO: rewrite so we don't iterate through the string twice. + while (index < str_length) { + char ch = str[index]; + if (' ' == ch) { + ++index; + continue; + } + if ('[' == ch) { // start new list + ++index; + T2E_PUSH(end) + T2E_PUSH(top) + continue; + } + if (']' == ch) { // finish last new list + ++index; + tok = top; + T2E_POP(top) + T2E_POP(end) + } else { + u32 i = index + 1; + for (; i < str_length && !is_delimiter(str[i]); ++i) {} + // i == str_length OR str[i] is a delimiter char. + tok = tokenate(str, index, i - index); + index = i; + } + u32 cell = cons(tok, empty_list); + if (end) tails[end] = cell; + if (!top) top = cell; + end = cell; + } + if (t2e_stack_top) { + error = MISSING_CLOSING_BRACKET; + return empty_list; + } + return top; +} + + +u32 +joy(u32 stack, u32 expression) +{ + u32 term; + while (expression) { + term = head(expression); + expression = tail(expression); + //print_joy_value(term);print_endl(); + //print_i64(expression);print_endl(); + if (TYPE_OF(term) == joySymbol) { + char *str = ht_lookup(VALUE_OF(term)); + if (error != NO_ERROR) + return 0; + print_str(str);print_endl(); + } else { // type == joyInt || type == joyBool || type == joyList + stack = cons(term, stack); + } + } + return stack; +} + + +void +main() +{ + memset(hash_table, 0, sizeof(hash_table)); + memset(string_heap, 0, sizeof(string_heap)); + memset(t2e_stack, 0, sizeof(t2e_stack)); + error = NO_ERROR; + + // TODO: these should be global. + u32 joy_true = JOY_VALUE(joyBool, 1); + u32 joy_false = JOY_VALUE(joyBool, 0); + + /* + u32 stack = empty_list; + stack = push_int(23, stack); + stack = cons(joy_true, stack); + stack = push_int(42, stack); + stack = push_symbol("cats", stack); + u32 el = empty_list; + el = push_int(48, el); + el = cons(el, el); + stack = cons(el, stack); + stack = cons(joy_false, stack); + stack = push_int(273, stack); + print_joy_list(stack); + print_endl(); + */ + + u32 expression = text_to_expression(" 1[2[true 3][aa[aa bb] aa bb cc]bob]false[]bob 3[4] ga[]ry"); + print_joy_list(expression); + print_endl(); + u32 stack = joy(empty_list, expression); + print_joy_list(stack); + print_endl(); +}