Parsing isn't this hard?
This commit is contained in:
parent
9356e5394b
commit
ebae69c391
|
|
@ -1,4 +1,5 @@
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
|
@ -7,7 +8,9 @@
|
||||||
|
|
||||||
|
|
||||||
const char *BLANKS = " \t";
|
const char *BLANKS = " \t";
|
||||||
const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd] [] 23 ";
|
/*const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd [] 23 ";*/
|
||||||
|
/*const char *TEXT = " 23 33 [] ";*/
|
||||||
|
const char *TEXT = "";
|
||||||
|
|
||||||
|
|
||||||
enum JoyTypeType {
|
enum JoyTypeType {
|
||||||
|
|
@ -69,6 +72,52 @@ push_integer_from_str(char *str, struct list_node* tail)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Pre-declare so we can use it in print_node(). */
|
||||||
|
void
|
||||||
|
print_list(struct list_node* el);
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
print_node(struct JoyType j)
|
||||||
|
{
|
||||||
|
switch (j.kind) {
|
||||||
|
case joyInt:
|
||||||
|
gmp_printf("%Zd", j.value.i);
|
||||||
|
break;
|
||||||
|
case joySymbol:
|
||||||
|
printf("%s", j.value.symbol);
|
||||||
|
break;
|
||||||
|
case joyTrue:
|
||||||
|
printf("true");
|
||||||
|
break;
|
||||||
|
case joyFalse:
|
||||||
|
printf("false");
|
||||||
|
break;
|
||||||
|
case joyList:
|
||||||
|
printf("[");
|
||||||
|
print_list(j.value.el);
|
||||||
|
printf("]");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printf("wtf");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
print_list(struct list_node* el)
|
||||||
|
{
|
||||||
|
while (NULL != el) {
|
||||||
|
print_node(el->head);
|
||||||
|
el = el->tail;
|
||||||
|
if (NULL != el) {
|
||||||
|
printf(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
char *
|
char *
|
||||||
trim_leading_blanks(char *str)
|
trim_leading_blanks(char *str)
|
||||||
{
|
{
|
||||||
|
|
@ -91,118 +140,141 @@ make_symbol_node(char *text, size_t size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Create a new list_node with a joyList head. */
|
||||||
struct list_node*
|
struct list_node*
|
||||||
text_to_expression(char *text)
|
make_list_node(struct list_node *el)
|
||||||
|
{
|
||||||
|
struct list_node *node;
|
||||||
|
node = GC_malloc(sizeof(struct list_node));
|
||||||
|
node->head.kind = joyList;
|
||||||
|
node->head.value.el = el;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define EMPTY_LIST (struct list_node*)NULL
|
||||||
|
|
||||||
|
/*
|
||||||
|
Extract terms from the text until a closing bracket is found.
|
||||||
|
*/
|
||||||
|
struct list_node*
|
||||||
|
parse_list(char **text)
|
||||||
{
|
{
|
||||||
char *rest;
|
char *rest;
|
||||||
ptrdiff_t diff;
|
ptrdiff_t diff;
|
||||||
struct list_node *current_list_node;
|
struct list_node *result = NULL;
|
||||||
struct list_node *result;
|
/* NULL string input? */
|
||||||
struct list_node *head = NULL;
|
|
||||||
|
|
||||||
if (NULL == text) {
|
if (NULL == *text) {
|
||||||
/* NULL string input. */
|
printf("Missing ']' bracket.");
|
||||||
return (struct list_node*)NULL;
|
exit(1);
|
||||||
}
|
};
|
||||||
text = trim_leading_blanks(text);
|
|
||||||
if (NULL == text) {
|
|
||||||
/* All blanks. */
|
|
||||||
return (struct list_node*)NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
rest = strpbrk(text, " []");
|
*text = trim_leading_blanks(*text);
|
||||||
|
|
||||||
|
if (NULL == *text) {
|
||||||
|
printf("Missing ']' bracket.");
|
||||||
|
exit(1);
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Look for blanks or brackets. */
|
||||||
|
rest = strpbrk(*text, " []");
|
||||||
/*
|
/*
|
||||||
rest now points to a space or '[' or ']' after a term,
|
rest now points to a space or '[' or ']' after a term,
|
||||||
-or- it is NULL if the rest of the string is a single term
|
-or- it is NULL if the rest of the string is a single term
|
||||||
with no spaces nor brackets.
|
with no spaces nor brackets. If that's the case then we're
|
||||||
|
missing a closing bracket!
|
||||||
*/
|
*/
|
||||||
|
if (NULL == rest) {
|
||||||
|
printf("Missing ']' bracket.");
|
||||||
|
exit(1);
|
||||||
|
};
|
||||||
|
|
||||||
while (NULL != rest) {
|
/* How many chars have we got? */
|
||||||
|
diff = rest - *text;
|
||||||
|
|
||||||
/* How many chars have we got? */
|
if (diff) {
|
||||||
diff = rest - text;
|
result = make_symbol_node(*text, diff);
|
||||||
/*
|
*text = rest;
|
||||||
diff can be zero when there is more than one space in
|
} else if ('[' == rest[0]) {
|
||||||
a sequence in the input string. This won't happen on
|
*text = rest++;
|
||||||
the first iteration but it can on later iterations.
|
result = make_list_node(parse_list(text));
|
||||||
*/
|
} else if (']' == rest[0]) {
|
||||||
|
*text = rest++;
|
||||||
if (diff) {
|
return result;
|
||||||
/* Allocate space and copy out the substring. */
|
|
||||||
current_list_node = make_symbol_node(text, diff);
|
|
||||||
if (head) {
|
|
||||||
head->tail = current_list_node;
|
|
||||||
} else {
|
|
||||||
/* There is no head now, so this must be the first
|
|
||||||
result, the head that we will eventually return. */
|
|
||||||
result = current_list_node;
|
|
||||||
}
|
|
||||||
head = current_list_node;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The next char is a space or '[' or ']'. */
|
|
||||||
if ('[' == rest[0] || ']' == rest[0]) {
|
|
||||||
printf("%c\n", rest[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
text = trim_leading_blanks(++rest);
|
|
||||||
|
|
||||||
/* calling strpbrk on NULL caused segfault! */
|
|
||||||
rest = (NULL != text) ? strpbrk(text, " []") : text;
|
|
||||||
}
|
|
||||||
if (text) {
|
|
||||||
current_list_node = make_symbol_node(text, strlen(text));
|
|
||||||
if (head) {
|
|
||||||
head->tail = current_list_node;
|
|
||||||
} else {
|
|
||||||
result = current_list_node;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
result->tail = parse_list(text);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pre-declare so we can use it in print_node(). */
|
/*
|
||||||
void
|
Get the next node from the text, updating text
|
||||||
print_list(struct list_node* el);
|
to point to the rest of the, uh, text.
|
||||||
|
*/
|
||||||
|
struct list_node*
|
||||||
void
|
parse_node(char **text)
|
||||||
print_node(struct JoyType j)
|
|
||||||
{
|
{
|
||||||
switch (j.kind) {
|
char *rest;
|
||||||
case joyInt:
|
ptrdiff_t diff;
|
||||||
gmp_printf("%Zd", j.value.i);
|
struct list_node *thing;
|
||||||
break;
|
|
||||||
case joySymbol:
|
/* NULL string input? */
|
||||||
printf("%s", j.value.symbol);
|
if (NULL == *text) return EMPTY_LIST;
|
||||||
break;
|
|
||||||
case joyTrue:
|
*text = trim_leading_blanks(*text);
|
||||||
printf("true");
|
|
||||||
break;
|
/* All blanks? */
|
||||||
case joyFalse:
|
if (NULL == *text) return EMPTY_LIST;
|
||||||
printf("false");
|
|
||||||
break;
|
/* Look for blanks or brackets. */
|
||||||
case joyList:
|
rest = strpbrk(*text, " []");
|
||||||
printf("[");
|
/*
|
||||||
print_list(j.value.el);
|
rest now points to a space or '[' or ']' after a term,
|
||||||
printf("]");
|
-or- it is NULL if the rest of the string is a single term
|
||||||
break;
|
with no spaces nor brackets. If that's the case then we're
|
||||||
default:
|
done, and we can just return a list with one symbol in it.
|
||||||
printf("wtf");
|
*/
|
||||||
}
|
if (NULL == rest) return make_symbol_node(*text, strlen(*text));
|
||||||
|
|
||||||
|
/* How many chars have we got? */
|
||||||
|
diff = rest - *text;
|
||||||
|
|
||||||
|
if (diff) {
|
||||||
|
thing = make_symbol_node(*text, diff);
|
||||||
|
*text = rest;
|
||||||
|
return thing;
|
||||||
|
}
|
||||||
|
if ('[' == rest[0]) {
|
||||||
|
*text = rest++;
|
||||||
|
return make_list_node(parse_list(text));
|
||||||
|
}
|
||||||
|
if (']' == rest[0]) {
|
||||||
|
printf("Extra ']' bracket.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
printf("Should be unreachable.");
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
struct list_node*
|
||||||
print_list(struct list_node* el)
|
text_to_expression(char *text)
|
||||||
{
|
{
|
||||||
while (NULL != el) {
|
struct list_node *result, *head, *tail;
|
||||||
print_node(el->head);
|
printf("1\n");
|
||||||
el = el->tail;
|
result = parse_node(&text);
|
||||||
if (NULL != el) {
|
printf("2\n");
|
||||||
printf(" ");
|
print_list(result);
|
||||||
}
|
printf(" <- eh?\n");
|
||||||
}
|
head = result;
|
||||||
|
tail = parse_node(&text);
|
||||||
|
while (NULL != tail) {
|
||||||
|
print_list(tail);
|
||||||
|
printf("<- ooh?\n");
|
||||||
|
head->tail = tail;
|
||||||
|
head = tail;
|
||||||
|
tail = parse_node(&text);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -211,7 +283,7 @@ main(void)
|
||||||
{
|
{
|
||||||
mpz_t pi;
|
mpz_t pi;
|
||||||
struct list_node* el;
|
struct list_node* el;
|
||||||
char *text = (char *)TEXT;
|
char *text = (char *)TEXT;
|
||||||
|
|
||||||
mp_set_memory_functions(
|
mp_set_memory_functions(
|
||||||
&GC_malloc,
|
&GC_malloc,
|
||||||
|
|
@ -223,8 +295,9 @@ main(void)
|
||||||
GC_register_finalizer(pi, my_callback, NULL, NULL, NULL);
|
GC_register_finalizer(pi, my_callback, NULL, NULL, NULL);
|
||||||
|
|
||||||
el = push_integer_from_str("3141592653589793238462643383279502884", 0);
|
el = push_integer_from_str("3141592653589793238462643383279502884", 0);
|
||||||
|
printf("BEGIN\n");
|
||||||
el->tail = text_to_expression(text);
|
el->tail = text_to_expression(text);
|
||||||
print_list(el);
|
print_list(el);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,3 +47,158 @@ typedef struct list_node* JoyList;
|
||||||
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
|
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
|
||||||
/*mpz_mul(pi, pi, pi);*/
|
/*mpz_mul(pi, pi, pi);*/
|
||||||
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
|
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
|
||||||
|
'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*=======================================================*/
|
||||||
|
struct list_node*
|
||||||
|
text_to_expression(char *text)
|
||||||
|
{
|
||||||
|
char *rest;
|
||||||
|
ptrdiff_t diff;
|
||||||
|
struct list_node *thing;
|
||||||
|
struct list_node *result;
|
||||||
|
struct list_node *tail = NULL;
|
||||||
|
struct list_node *stack[128];
|
||||||
|
int s = 0; /* stack pointer */
|
||||||
|
|
||||||
|
/* NULL string input? */
|
||||||
|
if (NULL == text) {
|
||||||
|
return EMPTY_LIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
text = trim_leading_blanks(text);
|
||||||
|
|
||||||
|
/* All blanks? */
|
||||||
|
if (NULL == text) {
|
||||||
|
return EMPTY_LIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Look for blanks or brackets. */
|
||||||
|
rest = strpbrk(text, " []");
|
||||||
|
/*
|
||||||
|
rest now points to a space or '[' or ']' after a term,
|
||||||
|
-or- it is NULL if the rest of the string is a single term
|
||||||
|
with no spaces nor brackets. If that's the case then we're
|
||||||
|
done, and we can just return a list with one symbol in it.
|
||||||
|
*/
|
||||||
|
if (NULL == rest) {
|
||||||
|
return make_symbol_node(text, strlen(text));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* How many chars have we got? */
|
||||||
|
diff = rest - text;
|
||||||
|
|
||||||
|
if (diff) {
|
||||||
|
thing = make_symbol_node(text, diff);
|
||||||
|
if (tail) {
|
||||||
|
tail->tail = thing;
|
||||||
|
} else {
|
||||||
|
/* There is no head now, so this must be the first
|
||||||
|
result, the head that we will eventually return. */
|
||||||
|
result = thing;
|
||||||
|
}
|
||||||
|
tail = thing;
|
||||||
|
|
||||||
|
} else if ('[' == rest[0]) {
|
||||||
|
rest++;
|
||||||
|
/* We need to parse a sub-list. */
|
||||||
|
if (tail) {
|
||||||
|
/* We have already parsed some terms, set them aside. */
|
||||||
|
stack[s++] = tail;
|
||||||
|
stack[s++] = result;
|
||||||
|
tail = (struct list_node *)NULL;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
This is going to be our first term when we're done parsing it.
|
||||||
|
Do nothing, empty stack will signal it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
}
|
||||||
|
} else if (']' == rest[0]) {
|
||||||
|
rest++;
|
||||||
|
if (s) {
|
||||||
|
/* We have a tail on the stack. */
|
||||||
|
thing = result;
|
||||||
|
result = stack[--s];
|
||||||
|
tail = stack[--s];
|
||||||
|
tail->tail = thing;
|
||||||
|
} else {
|
||||||
|
/* This list must have been the first thing in the expr. */
|
||||||
|
printf("wtf");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
result = make_symbol_node(text, diff);
|
||||||
|
|
||||||
|
/*
|
||||||
|
This is the head of the list we are eventually going to return,
|
||||||
|
so we assign it to the result
|
||||||
|
We are going to be adding things to the tail of this list, so
|
||||||
|
we'll need to keep track of that
|
||||||
|
*/
|
||||||
|
tail = result;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Now we want to
|
||||||
|
*/
|
||||||
|
while (NULL != rest) {
|
||||||
|
|
||||||
|
|
||||||
|
if (head) {
|
||||||
|
head->tail = current_list_node;
|
||||||
|
} else {
|
||||||
|
/* There is no head now, so this must be the first
|
||||||
|
result, the head that we will eventually return. */
|
||||||
|
result = current_list_node;
|
||||||
|
}
|
||||||
|
head = current_list_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The next char is a space or '[' or ']'. */
|
||||||
|
if ('[' == rest[0]) {
|
||||||
|
if (head) {
|
||||||
|
stack[s++] = head;
|
||||||
|
head = (struct list_node *)NULL;
|
||||||
|
}
|
||||||
|
/*printf("%c\n", rest[0]);*/
|
||||||
|
}
|
||||||
|
|
||||||
|
if (']' == rest[0]) {
|
||||||
|
current_list_node = make_list_node(stack[s--]);
|
||||||
|
if (head) {
|
||||||
|
head->tail = current_list_node;
|
||||||
|
} else {
|
||||||
|
/* There is no head now, so this must be the first
|
||||||
|
result, the head that we will eventually return. */
|
||||||
|
result = current_list_node;
|
||||||
|
}
|
||||||
|
head = current_list_node;
|
||||||
|
/*printf("%c\n", rest[0]);*/
|
||||||
|
}
|
||||||
|
|
||||||
|
text = trim_leading_blanks(++rest);
|
||||||
|
|
||||||
|
/* calling strpbrk on NULL caused segfault! */
|
||||||
|
rest = (NULL != text) ? strpbrk(text, " []") : text;
|
||||||
|
}
|
||||||
|
if (text) {
|
||||||
|
current_list_node = make_symbol_node(text, strlen(text));
|
||||||
|
if (head) {
|
||||||
|
head->tail = current_list_node;
|
||||||
|
} else {
|
||||||
|
result = current_list_node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
'
|
||||||
Loading…
Reference in New Issue