diff --git a/implementations/uvm-ncc/joy_types.c b/implementations/uvm-ncc/joy_types.c
index 0720794..5297e6b 100644
--- a/implementations/uvm-ncc/joy_types.c
+++ b/implementations/uvm-ncc/joy_types.c
@@ -1,37 +1,76 @@
+// Copyright © 2023 Simon Forman
+//
+// This file is part of Thun
+//
+// Thun is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Thun is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Thun. If not see .
+//
#include
-// In the Thun dialect of Joy we have four types of values:
-// Integers, Booleans, Symbols, and Lists.
-// We don't have Unions, Enums, or Typedefs.
-//
-// So how do we represent Joy types?
-//
-// In SICP they use a pair of arrays of pointers, one for heads and one
-// for tails.
+#include
+
+/*
+ ██████╗ ██████╗ ███╗ ██╗███████╗ ██╗ ██╗███████╗ █████╗ ██████╗
+██╔════╝██╔═══██╗████╗ ██║██╔════╝ ██║ ██║██╔════╝██╔══██╗██╔══██╗
+██║ ██║ ██║██╔██╗ ██║███████╗ ███████║█████╗ ███████║██████╔╝
+██║ ██║ ██║██║╚██╗██║╚════██║ ██╔══██║██╔══╝ ██╔══██║██╔═══╝
+╚██████╗╚██████╔╝██║ ╚████║███████║ ██║ ██║███████╗██║ ██║██║
+ ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝
+Cons Heap
+
+We don't have Unions, Enums, or Typedefs. So how do we represent Joy types?
+In SICP they use a pair of arrays of pointers, one for heads and one
+for tails.
+
+> A pointer to a pair is an index into the two vectors.
+
+*/
#define HEAP_SIZE 1024
u32 heads[HEAP_SIZE];
u32 tails[HEAP_SIZE];
-// > A pointer to a pair is an index into the two vectors.
-u32 free = 0;
+// cell 0 is reserved so that 0 can be the empty list.
+u32 free = 1;
// > We also need a representation for objects other than pairs (such as
-// numbers and symbols) and a way to distinguish one kind of data from
-// another. There are many methods of accomplishing this, but they all
-// reduce to using typed pointers, that is, to extending the notion of
-// ``pointer'' to include information on data type.
+// > numbers and symbols) and a way to distinguish one kind of data from
+// > another. There are many methods of accomplishing this, but they all
+// > reduce to using typed pointers, that is, to extending the notion of
+// > ``pointer'' to include information on data type.
+
+// Let's use u32 with the two MSB's for the type tag.
#define TYPE_OF(pointer) (pointer >> 30)
#define VALUE_OF(pointer) (pointer & 0x3fffffff)
#define JOY_VALUE(type, value) ((type & 3) << 30) | (value & 0x3fffffff)
-u8 joyInt = 0;
-u8 joyList = 1;
+/*
+This means that our ints are restricted to 30 bits for now, until
+I implement bignums.
+
+
+In the Thun dialect of Joy we have four types of values:
+
+Integers, Booleans, Symbols, and Lists.
+*/
+u8 joyList = 0;
+u8 joyInt = 1;
u8 joySymbol = 2;
u8 joyBool = 3;
+// Because the type tag for lists is 0 the empty list is just 0;
u32 empty_list = 0;
u32
@@ -51,6 +90,49 @@ u32 head(u32 list) { return heads[VALUE_OF(list)]; }
u32 tail(u32 list) { return tails[VALUE_OF(list)]; }
+
+/*
+███████╗████████╗██████╗ ██╗███╗ ██╗ ██████╗
+██╔════╝╚══██╔══╝██╔══██╗██║████╗ ██║██╔════╝
+███████╗ ██║ ██████╔╝██║██╔██╗ ██║██║ ███╗
+╚════██║ ██║ ██╔══██╗██║██║╚██╗██║██║ ██║
+███████║ ██║ ██║ ██║██║██║ ╚████║╚██████╔╝
+╚══════╝ ╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═════╝
+
+██╗ ██╗███████╗ █████╗ ██████╗
+██║ ██║██╔════╝██╔══██╗██╔══██╗
+███████║█████╗ ███████║██████╔╝
+██╔══██║██╔══╝ ██╔══██║██╔═══╝
+██║ ██║███████╗██║ ██║██║
+╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝
+Simple string storage heap.
+
+We need a place to keep symbol strings.
+
+*/
+
+#define STRING_HEAP_SIZE 100000
+
+char string_heap[STRING_HEAP_SIZE];
+u32 string_heap_top = 0;
+
+char*
+allocate_string(char *buffer, u32 offset, u32 length)
+{
+ u64 end = string_heap_top + length + 1;
+ if (end >= STRING_HEAP_SIZE)
+ return 0;
+ memcpy(string_heap + string_heap_top, buffer + offset, length);
+ string_heap[end] = '\0';
+ u32 new_string = string_heap_top;
+ string_heap_top = (u32)end + 1;
+ //print_str("allocating ");print_str(string_heap + new_string);print_endl();
+ return string_heap + new_string;
+}
+
+
+/******************************************************************************/
+
// No setjmp/longjmp, so let's have a global error value and check it after ops.
u64 error;
@@ -58,6 +140,19 @@ u64 error;
#define UNKNOWN_WORD_ERROR 1
#define MISSING_CLOSING_BRACKET 2
+/******************************************************************************/
+
+
+/*
+██████╗ ██████╗ ██╗███╗ ██╗████████╗███████╗██████╗
+██╔══██╗██╔══██╗██║████╗ ██║╚══██╔══╝██╔════╝██╔══██╗
+██████╔╝██████╔╝██║██╔██╗ ██║ ██║ █████╗ ██████╔╝
+██╔═══╝ ██╔══██╗██║██║╚██╗██║ ██║ ██╔══╝ ██╔══██╗
+██║ ██║ ██║██║██║ ╚████║ ██║ ███████╗██║ ██║
+╚═╝ ╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝╚═╝ ╚═╝
+Printer
+*/
+
void
print_joy_value(u32 jv)
{
@@ -93,9 +188,32 @@ print_joy_list(u32 list)
}
-// And now for a hash table.
-// https://benhoyt.com/writings/hash-table-in-c/#hash-tables
-// https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
+/*
+██╗ ██╗ █████╗ ███████╗██╗ ██╗
+██║ ██║██╔══██╗██╔════╝██║ ██║
+███████║███████║███████╗███████║
+██╔══██║██╔══██║╚════██║██╔══██║
+██║ ██║██║ ██║███████║██║ ██║
+╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝
+
+████████╗ █████╗ ██████╗ ██╗ ███████╗
+╚══██╔══╝██╔══██╗██╔══██╗██║ ██╔════╝
+ ██║ ███████║██████╔╝██║ █████╗
+ ██║ ██╔══██║██╔══██╗██║ ██╔══╝
+ ██║ ██║ ██║██████╔╝███████╗███████╗
+ ╚═╝ ╚═╝ ╚═╝╚═════╝ ╚══════╝╚══════╝
+And now for a hash table.
+
+This table maps between hashes of symbol strings which are used in the tagged pointers in Joy values
+and strings which are stored in the string heap.
+
+
+FNV hash function.
+
+https://benhoyt.com/writings/hash-table-in-c/#hash-tables
+https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
+
+*/
#define FNV_OFFSET 0xcbf29ce484222325
#define FNV_PRIME 0x100000001b3
@@ -154,10 +272,10 @@ ht_insert(char *symbol)
char*
ht_lookup(u32 hash)
{
- // Note that hash will be truncated to N (N=30 as it happens) bits
- // by VALUE_OF().
u32 index = hash % CAPACITY;
char *candidate = hash_table[index];
+ // Note that hash will be truncated to N (N=30 as it happens) bits
+ // by VALUE_OF().
u32 increment = ((hash >> EXPONENT) | 1) % CAPACITY;
while (candidate) {
if (hash == VALUE_OF(hash_key(candidate)))
@@ -169,42 +287,245 @@ ht_lookup(u32 hash)
return 0;
}
+/******************************************************************************/
+
u32
push_symbol(char *symbol, u32 stack)
{
return cons(JOY_VALUE(joySymbol, ht_insert(symbol)), stack);
}
+u32
+push_int(u32 n, u32 stack)
+{
+ return cons(JOY_VALUE(joyInt, n), stack);
+}
+
+/******************************************************************************/
+
+bool
+is_integer(char *str, u32 index, u32 length)
+{
+ for (;length; --length) {
+ char ch = *(str + index + length - 1);
+ if (!(ch == '0'
+ || ch == '1'
+ || ch == '2'
+ || ch == '3'
+ || ch == '4'
+ || ch == '5'
+ || ch == '6'
+ || ch == '7'
+ || ch == '8'
+ || ch == '9'))
+ {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+u32
+convert_integer(char *str, u32 index, u32 length)
+{
+ u32 result = 0;
+ length = length + index;
+ for (; index < length; ++index) {
+ char ch = *(str + index);
+ u8 digit = (u8)ch - (u8)'0';
+ result = result * 10 + digit;
+ }
+ //print_str("converted integer ");print_i64(result);print_endl();
+ return JOY_VALUE(joyInt, result);
+}
+
+/******************************************************************************/
+
+/*
+████████╗ ██████╗ ██╗ ██╗███████╗███╗ ██╗██╗███████╗███████╗██████╗
+╚══██╔══╝██╔═══██╗██║ ██╔╝██╔════╝████╗ ██║██║╚══███╔╝██╔════╝██╔══██╗
+ ██║ ██║ ██║█████╔╝ █████╗ ██╔██╗ ██║██║ ███╔╝ █████╗ ██████╔╝
+ ██║ ██║ ██║██╔═██╗ ██╔══╝ ██║╚██╗██║██║ ███╔╝ ██╔══╝ ██╔══██╗
+ ██║ ╚██████╔╝██║ ██╗███████╗██║ ╚████║██║███████╗███████╗██║ ██║
+ ╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝╚═╝╚══════╝╚══════╝╚═╝ ╚═╝
+Tokenizer
+
+*/
+
+char* LEFT_BRACKET_symbol = "[";
+char* RIGHT_BRACKET_symbol = "]";
+// Filled in in main().
+u32 LEFT_BRACKET;
+u32 RIGHT_BRACKET;
+
+
+u32
+tokenate(char *str, u32 index, u32 length)
+{
+ if (4 == length
+ && *(str + index) == 't'
+ && *(str + index + 1) == 'r'
+ && *(str + index + 2) == 'u'
+ && *(str + index + 3) == 'e'
+ ) {
+ //print_str("tokenate true");print_endl();
+ return JOY_VALUE(joyBool, 1);
+ }
+ if (5 == length
+ && *(str + index) == 'f'
+ && *(str + index + 1) == 'a'
+ && *(str + index + 2) == 'l'
+ && *(str + index + 3) == 's'
+ && *(str + index + 4) == 'e'
+ ) {
+ //print_str("tokenate false");print_endl();
+ return JOY_VALUE(joyBool, 0);
+ }
+ if (is_integer(str, index, length)) {
+ //print_str("tokenate integer");print_endl();
+ return convert_integer(str, index, length);
+ }
+ // TODO: Use ht_insert to avoid multiple allocations of the same string!
+ char *token = allocate_string(str, index, length);
+ if (!token)
+ return 0; // OOM
+ return JOY_VALUE(joySymbol, ht_insert(token));
+}
+
+
+u32
+tokenize0(char *str, u32 str_length, u32 index, u32 acc)
+{
+ if (index >= str_length) {
+ //print_i64(index);print_str(" : ");print_str("END tokenize");print_endl();
+ //print_i64(acc);print_str("<");print_endl();
+ return acc;
+ }
+ //print_i64(index);print_str(" : ");print_str(str + index);print_endl();
+ char ch = str[index];
+ if ('[' == ch) {
+ acc = cons(LEFT_BRACKET, tokenize0(str, str_length, index + 1, acc));
+ //print_i64(acc);print_str("<[");print_endl();
+ return acc;
+ }
+ if (']' == ch) {
+ acc = cons(RIGHT_BRACKET, tokenize0(str, str_length, index + 1, acc));
+ //print_i64(acc);print_str("<]");print_endl();
+ return acc;
+ }
+ if (' ' == ch) {
+ return tokenize0(str, str_length, index + 1, acc);
+ }
+ u32 i = index + 1;
+ for (; i < str_length; ++i) {
+ if (str[i] == '[' || str[i] == ']' || str[i] == ' ') {
+ break;
+ }
+ }
+ // i == str_length OR str[i] is a delimiter char.
+ return cons(tokenate(str, index, i - index), tokenize0(str, str_length, i, acc));
+
+}
+
+
+u32
+tokenize(char *str)
+{
+ return tokenize0(str, strlen(str), 0, empty_list);
+}
+
+
+
+/*
+██████╗ █████╗ ██████╗ ███████╗███████╗██████╗
+██╔══██╗██╔══██╗██╔══██╗██╔════╝██╔════╝██╔══██╗
+██████╔╝███████║██████╔╝███████╗█████╗ ██████╔╝
+██╔═══╝ ██╔══██║██╔══██╗╚════██║██╔══╝ ██╔══██╗
+██║ ██║ ██║██║ ██║███████║███████╗██║ ██║
+╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═╝
+Parser
+
+*/
+
+u32
+_reverse_list_in_place(u32 el, u32 end)
+{
+ u32 t = tail(el);
+ tails[el] = end;
+ return t ? _reverse_list_in_place(t, el) : el;
+}
+
+u32
+reverse_list_in_place(u32 el)
+{
+ return el ? _reverse_list_in_place(el, empty_list) : el;
+}
+
+u32 t2e_stack[1000];
+u32 t2e_stack_top = 0;
+
+u32
+text_to_expression(char *str)
+{
+ u32 frame = empty_list;
+ u32 tokens = tokenize(str);
+ //print_str("tokens: "); print_joy_list(tokens); print_endl();
+ //return tokens;
+ while (tokens) {
+ u32 tok = head(tokens);
+ tokens = tail(tokens);
+ if (LEFT_BRACKET == tok) {
+ //print_str("left bracket");print_endl();
+ t2e_stack[t2e_stack_top] = frame;
+ ++t2e_stack_top;
+ frame = empty_list;
+ continue;
+ }
+ if (RIGHT_BRACKET == tok) {
+ //print_str("right bracket");print_endl();
+ tok = reverse_list_in_place(frame);
+ //print_str("new list: "); print_joy_list(tok); print_endl();
+ --t2e_stack_top;
+ frame = t2e_stack[t2e_stack_top];
+ }
+ frame = cons(tok, frame);
+ //print_str("t2e frame: "); print_joy_list(frame); print_endl();
+ }
+ return reverse_list_in_place(frame);
+}
void
main()
{
+ LEFT_BRACKET = JOY_VALUE(joySymbol, ht_insert(LEFT_BRACKET_symbol));
+ RIGHT_BRACKET = JOY_VALUE(joySymbol, ht_insert(RIGHT_BRACKET_symbol));
+ // TODO: these should be global.
u32 joy_true = JOY_VALUE(joyBool, 1);
u32 joy_false = JOY_VALUE(joyBool, 0);
memset(hash_table, 0, sizeof(hash_table));
+ memset(string_heap, 0, sizeof(string_heap));
+ memset(t2e_stack, 0, sizeof(t2e_stack));
error = NO_ERROR;
-
+ /*
u32 stack = empty_list;
-
- stack = cons(23, stack);
+ stack = push_int(23, stack);
stack = cons(joy_true, stack);
- stack = cons(42, stack);
-
+ stack = push_int(42, stack);
stack = push_symbol("cats", stack);
-
u32 el = empty_list;
-
- el = cons(48, el);
+ el = push_int(48, el);
el = cons(el, el);
stack = cons(el, stack);
-
stack = cons(joy_false, stack);
- stack = cons(273, stack);
-
+ stack = push_int(273, stack);
print_joy_list(stack);
print_endl();
+ */
+
+ print_joy_list(text_to_expression(" 1[2[true 3][[]]bob]false[]bob 3[4]5"));
+ print_endl();
}