Parsing isn't this hard?

This commit is contained in:
Simon Forman 2023-02-02 08:32:45 -08:00
parent 9356e5394b
commit ebae69c391
2 changed files with 323 additions and 95 deletions

View File

@ -1,4 +1,5 @@
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@ -7,7 +8,9 @@
const char *BLANKS = " \t";
const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd] [] 23 ";
/*const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd [] 23 ";*/
/*const char *TEXT = " 23 33 [] ";*/
const char *TEXT = "";
enum JoyTypeType {
@ -69,97 +72,6 @@ push_integer_from_str(char *str, struct list_node* tail)
}
char *
trim_leading_blanks(char *str)
{
size_t offset = strspn(str, BLANKS);
return (offset == strlen(str)) ? NULL : (str + offset);
}
/* Create a new list_node with a joySymbol head. */
struct list_node*
make_symbol_node(char *text, size_t size)
{
struct list_node *node;
node = GC_malloc(sizeof(struct list_node));
node->head.kind = joySymbol;
node->head.value.symbol = (char *)GC_malloc(size + 1);
strncat(node->head.value.symbol, text, size);
/*printf("%s\n", node->head.value.symbol);*/
return node;
}
struct list_node*
text_to_expression(char *text)
{
char *rest;
ptrdiff_t diff;
struct list_node *current_list_node;
struct list_node *result;
struct list_node *head = NULL;
if (NULL == text) {
/* NULL string input. */
return (struct list_node*)NULL;
}
text = trim_leading_blanks(text);
if (NULL == text) {
/* All blanks. */
return (struct list_node*)NULL;
}
rest = strpbrk(text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets.
*/
while (NULL != rest) {
/* How many chars have we got? */
diff = rest - text;
/*
diff can be zero when there is more than one space in
a sequence in the input string. This won't happen on
the first iteration but it can on later iterations.
*/
if (diff) {
/* Allocate space and copy out the substring. */
current_list_node = make_symbol_node(text, diff);
if (head) {
head->tail = current_list_node;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = current_list_node;
}
head = current_list_node;
}
/* The next char is a space or '[' or ']'. */
if ('[' == rest[0] || ']' == rest[0]) {
printf("%c\n", rest[0]);
}
text = trim_leading_blanks(++rest);
/* calling strpbrk on NULL caused segfault! */
rest = (NULL != text) ? strpbrk(text, " []") : text;
}
if (text) {
current_list_node = make_symbol_node(text, strlen(text));
if (head) {
head->tail = current_list_node;
} else {
result = current_list_node;
}
}
return result;
}
/* Pre-declare so we can use it in print_node(). */
void
@ -206,6 +118,166 @@ print_list(struct list_node* el)
}
char *
trim_leading_blanks(char *str)
{
size_t offset = strspn(str, BLANKS);
return (offset == strlen(str)) ? NULL : (str + offset);
}
/* Create a new list_node with a joySymbol head. */
struct list_node*
make_symbol_node(char *text, size_t size)
{
struct list_node *node;
node = GC_malloc(sizeof(struct list_node));
node->head.kind = joySymbol;
node->head.value.symbol = (char *)GC_malloc(size + 1);
strncat(node->head.value.symbol, text, size);
/*printf("%s\n", node->head.value.symbol);*/
return node;
}
/* Create a new list_node with a joyList head. */
struct list_node*
make_list_node(struct list_node *el)
{
struct list_node *node;
node = GC_malloc(sizeof(struct list_node));
node->head.kind = joyList;
node->head.value.el = el;
return node;
}
#define EMPTY_LIST (struct list_node*)NULL
/*
Extract terms from the text until a closing bracket is found.
*/
struct list_node*
parse_list(char **text)
{
char *rest;
ptrdiff_t diff;
struct list_node *result = NULL;
/* NULL string input? */
if (NULL == *text) {
printf("Missing ']' bracket.");
exit(1);
};
*text = trim_leading_blanks(*text);
if (NULL == *text) {
printf("Missing ']' bracket.");
exit(1);
};
/* Look for blanks or brackets. */
rest = strpbrk(*text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets. If that's the case then we're
missing a closing bracket!
*/
if (NULL == rest) {
printf("Missing ']' bracket.");
exit(1);
};
/* How many chars have we got? */
diff = rest - *text;
if (diff) {
result = make_symbol_node(*text, diff);
*text = rest;
} else if ('[' == rest[0]) {
*text = rest++;
result = make_list_node(parse_list(text));
} else if (']' == rest[0]) {
*text = rest++;
return result;
}
result->tail = parse_list(text);
return result;
}
/*
Get the next node from the text, updating text
to point to the rest of the, uh, text.
*/
struct list_node*
parse_node(char **text)
{
char *rest;
ptrdiff_t diff;
struct list_node *thing;
/* NULL string input? */
if (NULL == *text) return EMPTY_LIST;
*text = trim_leading_blanks(*text);
/* All blanks? */
if (NULL == *text) return EMPTY_LIST;
/* Look for blanks or brackets. */
rest = strpbrk(*text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets. If that's the case then we're
done, and we can just return a list with one symbol in it.
*/
if (NULL == rest) return make_symbol_node(*text, strlen(*text));
/* How many chars have we got? */
diff = rest - *text;
if (diff) {
thing = make_symbol_node(*text, diff);
*text = rest;
return thing;
}
if ('[' == rest[0]) {
*text = rest++;
return make_list_node(parse_list(text));
}
if (']' == rest[0]) {
printf("Extra ']' bracket.");
exit(1);
}
printf("Should be unreachable.");
exit(1);
}
struct list_node*
text_to_expression(char *text)
{
struct list_node *result, *head, *tail;
printf("1\n");
result = parse_node(&text);
printf("2\n");
print_list(result);
printf(" <- eh?\n");
head = result;
tail = parse_node(&text);
while (NULL != tail) {
print_list(tail);
printf("<- ooh?\n");
head->tail = tail;
head = tail;
tail = parse_node(&text);
}
return result;
}
int
main(void)
{
@ -223,6 +295,7 @@ main(void)
GC_register_finalizer(pi, my_callback, NULL, NULL, NULL);
el = push_integer_from_str("3141592653589793238462643383279502884", 0);
printf("BEGIN\n");
el->tail = text_to_expression(text);
print_list(el);
printf("\n");

View File

@ -47,3 +47,158 @@ typedef struct list_node* JoyList;
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
/*mpz_mul(pi, pi, pi);*/
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
'
/*=======================================================*/
struct list_node*
text_to_expression(char *text)
{
char *rest;
ptrdiff_t diff;
struct list_node *thing;
struct list_node *result;
struct list_node *tail = NULL;
struct list_node *stack[128];
int s = 0; /* stack pointer */
/* NULL string input? */
if (NULL == text) {
return EMPTY_LIST;
}
text = trim_leading_blanks(text);
/* All blanks? */
if (NULL == text) {
return EMPTY_LIST;
}
/* Look for blanks or brackets. */
rest = strpbrk(text, " []");
/*
rest now points to a space or '[' or ']' after a term,
-or- it is NULL if the rest of the string is a single term
with no spaces nor brackets. If that's the case then we're
done, and we can just return a list with one symbol in it.
*/
if (NULL == rest) {
return make_symbol_node(text, strlen(text));
}
/* How many chars have we got? */
diff = rest - text;
if (diff) {
thing = make_symbol_node(text, diff);
if (tail) {
tail->tail = thing;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = thing;
}
tail = thing;
} else if ('[' == rest[0]) {
rest++;
/* We need to parse a sub-list. */
if (tail) {
/* We have already parsed some terms, set them aside. */
stack[s++] = tail;
stack[s++] = result;
tail = (struct list_node *)NULL;
} else {
/*
This is going to be our first term when we're done parsing it.
Do nothing, empty stack will signal it.
*/
}
} else if (']' == rest[0]) {
rest++;
if (s) {
/* We have a tail on the stack. */
thing = result;
result = stack[--s];
tail = stack[--s];
tail->tail = thing;
} else {
/* This list must have been the first thing in the expr. */
printf("wtf");
exit(1);
}
} else {
}
result = make_symbol_node(text, diff);
/*
This is the head of the list we are eventually going to return,
so we assign it to the result
We are going to be adding things to the tail of this list, so
we'll need to keep track of that
*/
tail = result;
/*
Now we want to
*/
while (NULL != rest) {
if (head) {
head->tail = current_list_node;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = current_list_node;
}
head = current_list_node;
}
/* The next char is a space or '[' or ']'. */
if ('[' == rest[0]) {
if (head) {
stack[s++] = head;
head = (struct list_node *)NULL;
}
/*printf("%c\n", rest[0]);*/
}
if (']' == rest[0]) {
current_list_node = make_list_node(stack[s--]);
if (head) {
head->tail = current_list_node;
} else {
/* There is no head now, so this must be the first
result, the head that we will eventually return. */
result = current_list_node;
}
head = current_list_node;
/*printf("%c\n", rest[0]);*/
}
text = trim_leading_blanks(++rest);
/* calling strpbrk on NULL caused segfault! */
rest = (NULL != text) ? strpbrk(text, " []") : text;
}
if (text) {
current_list_node = make_symbol_node(text, strlen(text));
if (head) {
head->tail = current_list_node;
} else {
result = current_list_node;
}
}
return result;
}
'