diff --git a/implementations/uvm-ncc/joy_types.c b/implementations/uvm-ncc/joy_types.c index 0720794..5297e6b 100644 --- a/implementations/uvm-ncc/joy_types.c +++ b/implementations/uvm-ncc/joy_types.c @@ -1,37 +1,76 @@ +// Copyright © 2023 Simon Forman +// +// This file is part of Thun +// +// Thun is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// Thun is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Thun. If not see . +// #include -// In the Thun dialect of Joy we have four types of values: -// Integers, Booleans, Symbols, and Lists. -// We don't have Unions, Enums, or Typedefs. -// -// So how do we represent Joy types? -// -// In SICP they use a pair of arrays of pointers, one for heads and one -// for tails. +#include + +/* + ██████╗ ██████╗ ███╗ ██╗███████╗ ██╗ ██╗███████╗ █████╗ ██████╗ +██╔════╝██╔═══██╗████╗ ██║██╔════╝ ██║ ██║██╔════╝██╔══██╗██╔══██╗ +██║ ██║ ██║██╔██╗ ██║███████╗ ███████║█████╗ ███████║██████╔╝ +██║ ██║ ██║██║╚██╗██║╚════██║ ██╔══██║██╔══╝ ██╔══██║██╔═══╝ +╚██████╗╚██████╔╝██║ ╚████║███████║ ██║ ██║███████╗██║ ██║██║ + ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ +Cons Heap + +We don't have Unions, Enums, or Typedefs. So how do we represent Joy types? +In SICP they use a pair of arrays of pointers, one for heads and one +for tails. + +> A pointer to a pair is an index into the two vectors. + +*/ #define HEAP_SIZE 1024 u32 heads[HEAP_SIZE]; u32 tails[HEAP_SIZE]; -// > A pointer to a pair is an index into the two vectors. -u32 free = 0; +// cell 0 is reserved so that 0 can be the empty list. +u32 free = 1; // > We also need a representation for objects other than pairs (such as -// numbers and symbols) and a way to distinguish one kind of data from -// another. There are many methods of accomplishing this, but they all -// reduce to using typed pointers, that is, to extending the notion of -// ``pointer'' to include information on data type. +// > numbers and symbols) and a way to distinguish one kind of data from +// > another. There are many methods of accomplishing this, but they all +// > reduce to using typed pointers, that is, to extending the notion of +// > ``pointer'' to include information on data type. + +// Let's use u32 with the two MSB's for the type tag. #define TYPE_OF(pointer) (pointer >> 30) #define VALUE_OF(pointer) (pointer & 0x3fffffff) #define JOY_VALUE(type, value) ((type & 3) << 30) | (value & 0x3fffffff) -u8 joyInt = 0; -u8 joyList = 1; +/* +This means that our ints are restricted to 30 bits for now, until +I implement bignums. + + +In the Thun dialect of Joy we have four types of values: + +Integers, Booleans, Symbols, and Lists. +*/ +u8 joyList = 0; +u8 joyInt = 1; u8 joySymbol = 2; u8 joyBool = 3; +// Because the type tag for lists is 0 the empty list is just 0; u32 empty_list = 0; u32 @@ -51,6 +90,49 @@ u32 head(u32 list) { return heads[VALUE_OF(list)]; } u32 tail(u32 list) { return tails[VALUE_OF(list)]; } + +/* +███████╗████████╗██████╗ ██╗███╗ ██╗ ██████╗ +██╔════╝╚══██╔══╝██╔══██╗██║████╗ ██║██╔════╝ +███████╗ ██║ ██████╔╝██║██╔██╗ ██║██║ ███╗ +╚════██║ ██║ ██╔══██╗██║██║╚██╗██║██║ ██║ +███████║ ██║ ██║ ██║██║██║ ╚████║╚██████╔╝ +╚══════╝ ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +██╗ ██╗███████╗ █████╗ ██████╗ +██║ ██║██╔════╝██╔══██╗██╔══██╗ +███████║█████╗ ███████║██████╔╝ +██╔══██║██╔══╝ ██╔══██║██╔═══╝ +██║ ██║███████╗██║ ██║██║ +╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ +Simple string storage heap. + +We need a place to keep symbol strings. + +*/ + +#define STRING_HEAP_SIZE 100000 + +char string_heap[STRING_HEAP_SIZE]; +u32 string_heap_top = 0; + +char* +allocate_string(char *buffer, u32 offset, u32 length) +{ + u64 end = string_heap_top + length + 1; + if (end >= STRING_HEAP_SIZE) + return 0; + memcpy(string_heap + string_heap_top, buffer + offset, length); + string_heap[end] = '\0'; + u32 new_string = string_heap_top; + string_heap_top = (u32)end + 1; + //print_str("allocating ");print_str(string_heap + new_string);print_endl(); + return string_heap + new_string; +} + + +/******************************************************************************/ + // No setjmp/longjmp, so let's have a global error value and check it after ops. u64 error; @@ -58,6 +140,19 @@ u64 error; #define UNKNOWN_WORD_ERROR 1 #define MISSING_CLOSING_BRACKET 2 +/******************************************************************************/ + + +/* +██████╗ ██████╗ ██╗███╗ ██╗████████╗███████╗██████╗ +██╔══██╗██╔══██╗██║████╗ ██║╚══██╔══╝██╔════╝██╔══██╗ +██████╔╝██████╔╝██║██╔██╗ ██║ ██║ █████╗ ██████╔╝ +██╔═══╝ ██╔══██╗██║██║╚██╗██║ ██║ ██╔══╝ ██╔══██╗ +██║ ██║ ██║██║██║ ╚████║ ██║ ███████╗██║ ██║ +╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ +Printer +*/ + void print_joy_value(u32 jv) { @@ -93,9 +188,32 @@ print_joy_list(u32 list) } -// And now for a hash table. -// https://benhoyt.com/writings/hash-table-in-c/#hash-tables -// https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function +/* +██╗ ██╗ █████╗ ███████╗██╗ ██╗ +██║ ██║██╔══██╗██╔════╝██║ ██║ +███████║███████║███████╗███████║ +██╔══██║██╔══██║╚════██║██╔══██║ +██║ ██║██║ ██║███████║██║ ██║ +╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝ + +████████╗ █████╗ ██████╗ ██╗ ███████╗ +╚══██╔══╝██╔══██╗██╔══██╗██║ ██╔════╝ + ██║ ███████║██████╔╝██║ █████╗ + ██║ ██╔══██║██╔══██╗██║ ██╔══╝ + ██║ ██║ ██║██████╔╝███████╗███████╗ + ╚═╝ ╚═╝ ╚═╝╚═════╝ ╚══════╝╚══════╝ +And now for a hash table. + +This table maps between hashes of symbol strings which are used in the tagged pointers in Joy values +and strings which are stored in the string heap. + + +FNV hash function. + +https://benhoyt.com/writings/hash-table-in-c/#hash-tables +https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + +*/ #define FNV_OFFSET 0xcbf29ce484222325 #define FNV_PRIME 0x100000001b3 @@ -154,10 +272,10 @@ ht_insert(char *symbol) char* ht_lookup(u32 hash) { - // Note that hash will be truncated to N (N=30 as it happens) bits - // by VALUE_OF(). u32 index = hash % CAPACITY; char *candidate = hash_table[index]; + // Note that hash will be truncated to N (N=30 as it happens) bits + // by VALUE_OF(). u32 increment = ((hash >> EXPONENT) | 1) % CAPACITY; while (candidate) { if (hash == VALUE_OF(hash_key(candidate))) @@ -169,42 +287,245 @@ ht_lookup(u32 hash) return 0; } +/******************************************************************************/ + u32 push_symbol(char *symbol, u32 stack) { return cons(JOY_VALUE(joySymbol, ht_insert(symbol)), stack); } +u32 +push_int(u32 n, u32 stack) +{ + return cons(JOY_VALUE(joyInt, n), stack); +} + +/******************************************************************************/ + +bool +is_integer(char *str, u32 index, u32 length) +{ + for (;length; --length) { + char ch = *(str + index + length - 1); + if (!(ch == '0' + || ch == '1' + || ch == '2' + || ch == '3' + || ch == '4' + || ch == '5' + || ch == '6' + || ch == '7' + || ch == '8' + || ch == '9')) + { + return 0; + } + } + return 1; +} + +u32 +convert_integer(char *str, u32 index, u32 length) +{ + u32 result = 0; + length = length + index; + for (; index < length; ++index) { + char ch = *(str + index); + u8 digit = (u8)ch - (u8)'0'; + result = result * 10 + digit; + } + //print_str("converted integer ");print_i64(result);print_endl(); + return JOY_VALUE(joyInt, result); +} + +/******************************************************************************/ + +/* +████████╗ ██████╗ ██╗ ██╗███████╗███╗ ██╗██╗███████╗███████╗██████╗ +╚══██╔══╝██╔═══██╗██║ ██╔╝██╔════╝████╗ ██║██║╚══███╔╝██╔════╝██╔══██╗ + ██║ ██║ ██║█████╔╝ █████╗ ██╔██╗ ██║██║ ███╔╝ █████╗ ██████╔╝ + ██║ ██║ ██║██╔═██╗ ██╔══╝ ██║╚██╗██║██║ ███╔╝ ██╔══╝ ██╔══██╗ + ██║ ╚██████╔╝██║ ██╗███████╗██║ ╚████║██║███████╗███████╗██║ ██║ + ╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝╚═╝╚══════╝╚══════╝╚═╝ ╚═╝ +Tokenizer + +*/ + +char* LEFT_BRACKET_symbol = "["; +char* RIGHT_BRACKET_symbol = "]"; +// Filled in in main(). +u32 LEFT_BRACKET; +u32 RIGHT_BRACKET; + + +u32 +tokenate(char *str, u32 index, u32 length) +{ + if (4 == length + && *(str + index) == 't' + && *(str + index + 1) == 'r' + && *(str + index + 2) == 'u' + && *(str + index + 3) == 'e' + ) { + //print_str("tokenate true");print_endl(); + return JOY_VALUE(joyBool, 1); + } + if (5 == length + && *(str + index) == 'f' + && *(str + index + 1) == 'a' + && *(str + index + 2) == 'l' + && *(str + index + 3) == 's' + && *(str + index + 4) == 'e' + ) { + //print_str("tokenate false");print_endl(); + return JOY_VALUE(joyBool, 0); + } + if (is_integer(str, index, length)) { + //print_str("tokenate integer");print_endl(); + return convert_integer(str, index, length); + } + // TODO: Use ht_insert to avoid multiple allocations of the same string! + char *token = allocate_string(str, index, length); + if (!token) + return 0; // OOM + return JOY_VALUE(joySymbol, ht_insert(token)); +} + + +u32 +tokenize0(char *str, u32 str_length, u32 index, u32 acc) +{ + if (index >= str_length) { + //print_i64(index);print_str(" : ");print_str("END tokenize");print_endl(); + //print_i64(acc);print_str("<");print_endl(); + return acc; + } + //print_i64(index);print_str(" : ");print_str(str + index);print_endl(); + char ch = str[index]; + if ('[' == ch) { + acc = cons(LEFT_BRACKET, tokenize0(str, str_length, index + 1, acc)); + //print_i64(acc);print_str("<[");print_endl(); + return acc; + } + if (']' == ch) { + acc = cons(RIGHT_BRACKET, tokenize0(str, str_length, index + 1, acc)); + //print_i64(acc);print_str("<]");print_endl(); + return acc; + } + if (' ' == ch) { + return tokenize0(str, str_length, index + 1, acc); + } + u32 i = index + 1; + for (; i < str_length; ++i) { + if (str[i] == '[' || str[i] == ']' || str[i] == ' ') { + break; + } + } + // i == str_length OR str[i] is a delimiter char. + return cons(tokenate(str, index, i - index), tokenize0(str, str_length, i, acc)); + +} + + +u32 +tokenize(char *str) +{ + return tokenize0(str, strlen(str), 0, empty_list); +} + + + +/* +██████╗ █████╗ ██████╗ ███████╗███████╗██████╗ +██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝██╔══██╗ +██████╔╝███████║██████╔╝███████╗█████╗ ██████╔╝ +██╔═══╝ ██╔══██║██╔══██╗╚════██║██╔══╝ ██╔══██╗ +██║ ██║ ██║██║ ██║███████║███████╗██║ ██║ +╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═╝ +Parser + +*/ + +u32 +_reverse_list_in_place(u32 el, u32 end) +{ + u32 t = tail(el); + tails[el] = end; + return t ? _reverse_list_in_place(t, el) : el; +} + +u32 +reverse_list_in_place(u32 el) +{ + return el ? _reverse_list_in_place(el, empty_list) : el; +} + +u32 t2e_stack[1000]; +u32 t2e_stack_top = 0; + +u32 +text_to_expression(char *str) +{ + u32 frame = empty_list; + u32 tokens = tokenize(str); + //print_str("tokens: "); print_joy_list(tokens); print_endl(); + //return tokens; + while (tokens) { + u32 tok = head(tokens); + tokens = tail(tokens); + if (LEFT_BRACKET == tok) { + //print_str("left bracket");print_endl(); + t2e_stack[t2e_stack_top] = frame; + ++t2e_stack_top; + frame = empty_list; + continue; + } + if (RIGHT_BRACKET == tok) { + //print_str("right bracket");print_endl(); + tok = reverse_list_in_place(frame); + //print_str("new list: "); print_joy_list(tok); print_endl(); + --t2e_stack_top; + frame = t2e_stack[t2e_stack_top]; + } + frame = cons(tok, frame); + //print_str("t2e frame: "); print_joy_list(frame); print_endl(); + } + return reverse_list_in_place(frame); +} void main() { + LEFT_BRACKET = JOY_VALUE(joySymbol, ht_insert(LEFT_BRACKET_symbol)); + RIGHT_BRACKET = JOY_VALUE(joySymbol, ht_insert(RIGHT_BRACKET_symbol)); + // TODO: these should be global. u32 joy_true = JOY_VALUE(joyBool, 1); u32 joy_false = JOY_VALUE(joyBool, 0); memset(hash_table, 0, sizeof(hash_table)); + memset(string_heap, 0, sizeof(string_heap)); + memset(t2e_stack, 0, sizeof(t2e_stack)); error = NO_ERROR; - + /* u32 stack = empty_list; - - stack = cons(23, stack); + stack = push_int(23, stack); stack = cons(joy_true, stack); - stack = cons(42, stack); - + stack = push_int(42, stack); stack = push_symbol("cats", stack); - u32 el = empty_list; - - el = cons(48, el); + el = push_int(48, el); el = cons(el, el); stack = cons(el, stack); - stack = cons(joy_false, stack); - stack = cons(273, stack); - + stack = push_int(273, stack); print_joy_list(stack); print_endl(); + */ + + print_joy_list(text_to_expression(" 1[2[true 3][[]]bob]false[]bob 3[4]5")); + print_endl(); }