Parsing isn't this hard?

This commit is contained in:
Simon Forman 2023-02-02 08:32:45 -08:00
parent 9356e5394b
commit ebae69c391
2 changed files with 323 additions and 95 deletions

View File

@ -1,4 +1,5 @@
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@ -7,7 +8,9 @@
const char *BLANKS = " \t";
const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd] [] 23 ";
/*const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd [] 23 ";*/
/*const char *TEXT = " 23 33 [] ";*/
const char *TEXT = "";
enum JoyTypeType {
@ -69,6 +72,52 @@ push_integer_from_str(char *str, struct list_node* tail)
}
/* Pre-declare so we can use it in print_node(). */
void
print_list(struct list_node* el);
void
print_node(struct JoyType j)
{
switch (j.kind) {
case joyInt:
gmp_printf("%Zd", j.value.i);
break;
case joySymbol:
printf("%s", j.value.symbol);
break;
case joyTrue:
printf("true");
break;
case joyFalse:
printf("false");
break;
case joyList:
printf("[");
print_list(j.value.el);
printf("]");
break;
default:
printf("wtf");
}
}
void
print_list(struct list_node* el)
{
while (NULL != el) {
print_node(el->head);
el = el->tail;
if (NULL != el) {
printf(" ");
}
}
}
char *
trim_leading_blanks(char *str)
{
@ -91,118 +140,141 @@ make_symbol_node(char *text, size_t size)
}
/* Create a new list_node with a joyList head. */
struct list_node*
text_to_expression(char *text)
make_list_node(struct list_node *el)
{
struct list_node *node;
node = GC_malloc(sizeof(struct list_node));
node->head.kind = joyList;
node->head.value.el = el;
return node;
}
#define EMPTY_LIST (struct list_node*)NULL
/*
Extract terms from the text until a closing bracket is found.
*/
struct list_node*
parse_list(char **text)
{
char *rest;
ptrdiff_t diff;
struct list_node *current_list_node;
struct list_node *result;
struct list_node *head = NULL;
struct list_node *result = NULL;
/* NULL string input? */
if (NULL == text) {
/* NULL string input. */
return (struct list_node*)NULL;
}
text = trim_leading_blanks(text);
if (NULL == text) {
/* All blanks. */
return (struct list_node*)NULL;
}
if (NULL == *text) {
printf("Missing ']' bracket.");
exit(1);
};
rest = strpbrk(text, " []");
*text = trim_leading_blanks(*text);
if (NULL == *text) {
printf("Missing ']' bracket.");
exit(1);
};
/* Look for blanks or brackets. */
rest = strpbrk(*text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets.
with no spaces nor brackets. If that's the case then we're
missing a closing bracket!
*/
if (NULL == rest) {
printf("Missing ']' bracket.");
exit(1);
};
while (NULL != rest) {
/* How many chars have we got? */
diff = rest - *text;
/* How many chars have we got? */
diff = rest - text;
/*
diff can be zero when there is more than one space in
a sequence in the input string. This won't happen on
the first iteration but it can on later iterations.
*/
if (diff) {
/* Allocate space and copy out the substring. */
current_list_node = make_symbol_node(text, diff);
if (head) {
head->tail = current_list_node;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = current_list_node;
}
head = current_list_node;
}
/* The next char is a space or '[' or ']'. */
if ('[' == rest[0] || ']' == rest[0]) {
printf("%c\n", rest[0]);
}
text = trim_leading_blanks(++rest);
/* calling strpbrk on NULL caused segfault! */
rest = (NULL != text) ? strpbrk(text, " []") : text;
}
if (text) {
current_list_node = make_symbol_node(text, strlen(text));
if (head) {
head->tail = current_list_node;
} else {
result = current_list_node;
}
if (diff) {
result = make_symbol_node(*text, diff);
*text = rest;
} else if ('[' == rest[0]) {
*text = rest++;
result = make_list_node(parse_list(text));
} else if (']' == rest[0]) {
*text = rest++;
return result;
}
result->tail = parse_list(text);
return result;
}
/* Pre-declare so we can use it in print_node(). */
void
print_list(struct list_node* el);
void
print_node(struct JoyType j)
/*
Get the next node from the text, updating text
to point to the rest of the, uh, text.
*/
struct list_node*
parse_node(char **text)
{
switch (j.kind) {
case joyInt:
gmp_printf("%Zd", j.value.i);
break;
case joySymbol:
printf("%s", j.value.symbol);
break;
case joyTrue:
printf("true");
break;
case joyFalse:
printf("false");
break;
case joyList:
printf("[");
print_list(j.value.el);
printf("]");
break;
default:
printf("wtf");
}
char *rest;
ptrdiff_t diff;
struct list_node *thing;
/* NULL string input? */
if (NULL == *text) return EMPTY_LIST;
*text = trim_leading_blanks(*text);
/* All blanks? */
if (NULL == *text) return EMPTY_LIST;
/* Look for blanks or brackets. */
rest = strpbrk(*text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets. If that's the case then we're
done, and we can just return a list with one symbol in it.
*/
if (NULL == rest) return make_symbol_node(*text, strlen(*text));
/* How many chars have we got? */
diff = rest - *text;
if (diff) {
thing = make_symbol_node(*text, diff);
*text = rest;
return thing;
}
if ('[' == rest[0]) {
*text = rest++;
return make_list_node(parse_list(text));
}
if (']' == rest[0]) {
printf("Extra ']' bracket.");
exit(1);
}
printf("Should be unreachable.");
exit(1);
}
void
print_list(struct list_node* el)
struct list_node*
text_to_expression(char *text)
{
while (NULL != el) {
print_node(el->head);
el = el->tail;
if (NULL != el) {
printf(" ");
}
}
struct list_node *result, *head, *tail;
printf("1\n");
result = parse_node(&text);
printf("2\n");
print_list(result);
printf(" <- eh?\n");
head = result;
tail = parse_node(&text);
while (NULL != tail) {
print_list(tail);
printf("<- ooh?\n");
head->tail = tail;
head = tail;
tail = parse_node(&text);
}
return result;
}
@ -211,7 +283,7 @@ main(void)
{
mpz_t pi;
struct list_node* el;
char *text = (char *)TEXT;
char *text = (char *)TEXT;
mp_set_memory_functions(
&GC_malloc,
@ -223,8 +295,9 @@ main(void)
GC_register_finalizer(pi, my_callback, NULL, NULL, NULL);
el = push_integer_from_str("3141592653589793238462643383279502884", 0);
printf("BEGIN\n");
el->tail = text_to_expression(text);
print_list(el);
printf("\n");
print_list(el);
printf("\n");
return 0;
}

View File

@ -47,3 +47,158 @@ typedef struct list_node* JoyList;
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
/*mpz_mul(pi, pi, pi);*/
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
'
/*=======================================================*/
struct list_node*
text_to_expression(char *text)
{
char *rest;
ptrdiff_t diff;
struct list_node *thing;
struct list_node *result;
struct list_node *tail = NULL;
struct list_node *stack[128];
int s = 0; /* stack pointer */
/* NULL string input? */
if (NULL == text) {
return EMPTY_LIST;
}
text = trim_leading_blanks(text);
/* All blanks? */
if (NULL == text) {
return EMPTY_LIST;
}
/* Look for blanks or brackets. */
rest = strpbrk(text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets. If that's the case then we're
done, and we can just return a list with one symbol in it.
*/
if (NULL == rest) {
return make_symbol_node(text, strlen(text));
}
/* How many chars have we got? */
diff = rest - text;
if (diff) {
thing = make_symbol_node(text, diff);
if (tail) {
tail->tail = thing;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = thing;
}
tail = thing;
} else if ('[' == rest[0]) {
rest++;
/* We need to parse a sub-list. */
if (tail) {
/* We have already parsed some terms, set them aside. */
stack[s++] = tail;
stack[s++] = result;
tail = (struct list_node *)NULL;
} else {
/*
This is going to be our first term when we're done parsing it.
Do nothing, empty stack will signal it.
*/
}
} else if (']' == rest[0]) {
rest++;
if (s) {
/* We have a tail on the stack. */
thing = result;
result = stack[--s];
tail = stack[--s];
tail->tail = thing;
} else {
/* This list must have been the first thing in the expr. */
printf("wtf");
exit(1);
}
} else {
}
result = make_symbol_node(text, diff);
/*
This is the head of the list we are eventually going to return,
so we assign it to the result
We are going to be adding things to the tail of this list, so
we'll need to keep track of that
*/
tail = result;
/*
Now we want to
*/
while (NULL != rest) {
if (head) {
head->tail = current_list_node;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = current_list_node;
}
head = current_list_node;
}
/* The next char is a space or '[' or ']'. */
if ('[' == rest[0]) {
if (head) {
stack[s++] = head;
head = (struct list_node *)NULL;
}
/*printf("%c\n", rest[0]);*/
}
if (']' == rest[0]) {
current_list_node = make_list_node(stack[s--]);
if (head) {
head->tail = current_list_node;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = current_list_node;
}
head = current_list_node;
/*printf("%c\n", rest[0]);*/
}
text = trim_leading_blanks(++rest);
/* calling strpbrk on NULL caused segfault! */
rest = (NULL != text) ? strpbrk(text, " []") : text;
}
if (text) {
current_list_node = make_symbol_node(text, strlen(text));
if (head) {
head->tail = current_list_node;
} else {
result = current_list_node;
}
}
return result;
}
'