Parsing isn't this hard?
This commit is contained in:
parent
9356e5394b
commit
ebae69c391
|
|
@ -1,4 +1,5 @@
|
|||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
|
@ -7,7 +8,9 @@
|
|||
|
||||
|
||||
const char *BLANKS = " \t";
|
||||
const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd] [] 23 ";
|
||||
/*const char *TEXT = " 23 [dup *] i hi there fr [[] ie]nd [] 23 ";*/
|
||||
/*const char *TEXT = " 23 33 [] ";*/
|
||||
const char *TEXT = "";
|
||||
|
||||
|
||||
enum JoyTypeType {
|
||||
|
|
@ -69,6 +72,52 @@ push_integer_from_str(char *str, struct list_node* tail)
|
|||
}
|
||||
|
||||
|
||||
|
||||
/* Pre-declare so we can use it in print_node(). */
|
||||
void
|
||||
print_list(struct list_node* el);
|
||||
|
||||
|
||||
void
|
||||
print_node(struct JoyType j)
|
||||
{
|
||||
switch (j.kind) {
|
||||
case joyInt:
|
||||
gmp_printf("%Zd", j.value.i);
|
||||
break;
|
||||
case joySymbol:
|
||||
printf("%s", j.value.symbol);
|
||||
break;
|
||||
case joyTrue:
|
||||
printf("true");
|
||||
break;
|
||||
case joyFalse:
|
||||
printf("false");
|
||||
break;
|
||||
case joyList:
|
||||
printf("[");
|
||||
print_list(j.value.el);
|
||||
printf("]");
|
||||
break;
|
||||
default:
|
||||
printf("wtf");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
print_list(struct list_node* el)
|
||||
{
|
||||
while (NULL != el) {
|
||||
print_node(el->head);
|
||||
el = el->tail;
|
||||
if (NULL != el) {
|
||||
printf(" ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char *
|
||||
trim_leading_blanks(char *str)
|
||||
{
|
||||
|
|
@ -91,118 +140,141 @@ make_symbol_node(char *text, size_t size)
|
|||
}
|
||||
|
||||
|
||||
/* Create a new list_node with a joyList head. */
|
||||
struct list_node*
|
||||
text_to_expression(char *text)
|
||||
make_list_node(struct list_node *el)
|
||||
{
|
||||
struct list_node *node;
|
||||
node = GC_malloc(sizeof(struct list_node));
|
||||
node->head.kind = joyList;
|
||||
node->head.value.el = el;
|
||||
return node;
|
||||
}
|
||||
|
||||
#define EMPTY_LIST (struct list_node*)NULL
|
||||
|
||||
/*
|
||||
Extract terms from the text until a closing bracket is found.
|
||||
*/
|
||||
struct list_node*
|
||||
parse_list(char **text)
|
||||
{
|
||||
char *rest;
|
||||
ptrdiff_t diff;
|
||||
struct list_node *current_list_node;
|
||||
struct list_node *result;
|
||||
struct list_node *head = NULL;
|
||||
struct list_node *result = NULL;
|
||||
/* NULL string input? */
|
||||
|
||||
if (NULL == text) {
|
||||
/* NULL string input. */
|
||||
return (struct list_node*)NULL;
|
||||
}
|
||||
text = trim_leading_blanks(text);
|
||||
if (NULL == text) {
|
||||
/* All blanks. */
|
||||
return (struct list_node*)NULL;
|
||||
}
|
||||
if (NULL == *text) {
|
||||
printf("Missing ']' bracket.");
|
||||
exit(1);
|
||||
};
|
||||
|
||||
rest = strpbrk(text, " []");
|
||||
*text = trim_leading_blanks(*text);
|
||||
|
||||
if (NULL == *text) {
|
||||
printf("Missing ']' bracket.");
|
||||
exit(1);
|
||||
};
|
||||
|
||||
/* Look for blanks or brackets. */
|
||||
rest = strpbrk(*text, " []");
|
||||
/*
|
||||
rest now points to a space or '[' or ']' after a term,
|
||||
-or- it is NULL if the rest of the string is a single term
|
||||
with no spaces nor brackets.
|
||||
with no spaces nor brackets. If that's the case then we're
|
||||
missing a closing bracket!
|
||||
*/
|
||||
if (NULL == rest) {
|
||||
printf("Missing ']' bracket.");
|
||||
exit(1);
|
||||
};
|
||||
|
||||
while (NULL != rest) {
|
||||
/* How many chars have we got? */
|
||||
diff = rest - *text;
|
||||
|
||||
/* How many chars have we got? */
|
||||
diff = rest - text;
|
||||
/*
|
||||
diff can be zero when there is more than one space in
|
||||
a sequence in the input string. This won't happen on
|
||||
the first iteration but it can on later iterations.
|
||||
*/
|
||||
|
||||
if (diff) {
|
||||
/* Allocate space and copy out the substring. */
|
||||
current_list_node = make_symbol_node(text, diff);
|
||||
if (head) {
|
||||
head->tail = current_list_node;
|
||||
} else {
|
||||
/* There is no head now, so this must be the first
|
||||
result, the head that we will eventually return. */
|
||||
result = current_list_node;
|
||||
}
|
||||
head = current_list_node;
|
||||
}
|
||||
|
||||
/* The next char is a space or '[' or ']'. */
|
||||
if ('[' == rest[0] || ']' == rest[0]) {
|
||||
printf("%c\n", rest[0]);
|
||||
}
|
||||
|
||||
text = trim_leading_blanks(++rest);
|
||||
|
||||
/* calling strpbrk on NULL caused segfault! */
|
||||
rest = (NULL != text) ? strpbrk(text, " []") : text;
|
||||
}
|
||||
if (text) {
|
||||
current_list_node = make_symbol_node(text, strlen(text));
|
||||
if (head) {
|
||||
head->tail = current_list_node;
|
||||
} else {
|
||||
result = current_list_node;
|
||||
}
|
||||
if (diff) {
|
||||
result = make_symbol_node(*text, diff);
|
||||
*text = rest;
|
||||
} else if ('[' == rest[0]) {
|
||||
*text = rest++;
|
||||
result = make_list_node(parse_list(text));
|
||||
} else if (']' == rest[0]) {
|
||||
*text = rest++;
|
||||
return result;
|
||||
}
|
||||
result->tail = parse_list(text);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Pre-declare so we can use it in print_node(). */
|
||||
void
|
||||
print_list(struct list_node* el);
|
||||
|
||||
|
||||
void
|
||||
print_node(struct JoyType j)
|
||||
/*
|
||||
Get the next node from the text, updating text
|
||||
to point to the rest of the, uh, text.
|
||||
*/
|
||||
struct list_node*
|
||||
parse_node(char **text)
|
||||
{
|
||||
switch (j.kind) {
|
||||
case joyInt:
|
||||
gmp_printf("%Zd", j.value.i);
|
||||
break;
|
||||
case joySymbol:
|
||||
printf("%s", j.value.symbol);
|
||||
break;
|
||||
case joyTrue:
|
||||
printf("true");
|
||||
break;
|
||||
case joyFalse:
|
||||
printf("false");
|
||||
break;
|
||||
case joyList:
|
||||
printf("[");
|
||||
print_list(j.value.el);
|
||||
printf("]");
|
||||
break;
|
||||
default:
|
||||
printf("wtf");
|
||||
}
|
||||
char *rest;
|
||||
ptrdiff_t diff;
|
||||
struct list_node *thing;
|
||||
|
||||
/* NULL string input? */
|
||||
if (NULL == *text) return EMPTY_LIST;
|
||||
|
||||
*text = trim_leading_blanks(*text);
|
||||
|
||||
/* All blanks? */
|
||||
if (NULL == *text) return EMPTY_LIST;
|
||||
|
||||
/* Look for blanks or brackets. */
|
||||
rest = strpbrk(*text, " []");
|
||||
/*
|
||||
rest now points to a space or '[' or ']' after a term,
|
||||
-or- it is NULL if the rest of the string is a single term
|
||||
with no spaces nor brackets. If that's the case then we're
|
||||
done, and we can just return a list with one symbol in it.
|
||||
*/
|
||||
if (NULL == rest) return make_symbol_node(*text, strlen(*text));
|
||||
|
||||
/* How many chars have we got? */
|
||||
diff = rest - *text;
|
||||
|
||||
if (diff) {
|
||||
thing = make_symbol_node(*text, diff);
|
||||
*text = rest;
|
||||
return thing;
|
||||
}
|
||||
if ('[' == rest[0]) {
|
||||
*text = rest++;
|
||||
return make_list_node(parse_list(text));
|
||||
}
|
||||
if (']' == rest[0]) {
|
||||
printf("Extra ']' bracket.");
|
||||
exit(1);
|
||||
}
|
||||
printf("Should be unreachable.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
print_list(struct list_node* el)
|
||||
struct list_node*
|
||||
text_to_expression(char *text)
|
||||
{
|
||||
while (NULL != el) {
|
||||
print_node(el->head);
|
||||
el = el->tail;
|
||||
if (NULL != el) {
|
||||
printf(" ");
|
||||
}
|
||||
}
|
||||
struct list_node *result, *head, *tail;
|
||||
printf("1\n");
|
||||
result = parse_node(&text);
|
||||
printf("2\n");
|
||||
print_list(result);
|
||||
printf(" <- eh?\n");
|
||||
head = result;
|
||||
tail = parse_node(&text);
|
||||
while (NULL != tail) {
|
||||
print_list(tail);
|
||||
printf("<- ooh?\n");
|
||||
head->tail = tail;
|
||||
head = tail;
|
||||
tail = parse_node(&text);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -211,7 +283,7 @@ main(void)
|
|||
{
|
||||
mpz_t pi;
|
||||
struct list_node* el;
|
||||
char *text = (char *)TEXT;
|
||||
char *text = (char *)TEXT;
|
||||
|
||||
mp_set_memory_functions(
|
||||
&GC_malloc,
|
||||
|
|
@ -223,8 +295,9 @@ main(void)
|
|||
GC_register_finalizer(pi, my_callback, NULL, NULL, NULL);
|
||||
|
||||
el = push_integer_from_str("3141592653589793238462643383279502884", 0);
|
||||
printf("BEGIN\n");
|
||||
el->tail = text_to_expression(text);
|
||||
print_list(el);
|
||||
printf("\n");
|
||||
print_list(el);
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,3 +47,158 @@ typedef struct list_node* JoyList;
|
|||
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
|
||||
/*mpz_mul(pi, pi, pi);*/
|
||||
/*gmp_printf("%Zd = %Zx\n", pi, pi);*/
|
||||
'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*=======================================================*/
|
||||
struct list_node*
|
||||
text_to_expression(char *text)
|
||||
{
|
||||
char *rest;
|
||||
ptrdiff_t diff;
|
||||
struct list_node *thing;
|
||||
struct list_node *result;
|
||||
struct list_node *tail = NULL;
|
||||
struct list_node *stack[128];
|
||||
int s = 0; /* stack pointer */
|
||||
|
||||
/* NULL string input? */
|
||||
if (NULL == text) {
|
||||
return EMPTY_LIST;
|
||||
}
|
||||
|
||||
text = trim_leading_blanks(text);
|
||||
|
||||
/* All blanks? */
|
||||
if (NULL == text) {
|
||||
return EMPTY_LIST;
|
||||
}
|
||||
|
||||
/* Look for blanks or brackets. */
|
||||
rest = strpbrk(text, " []");
|
||||
/*
|
||||
rest now points to a space or '[' or ']' after a term,
|
||||
-or- it is NULL if the rest of the string is a single term
|
||||
with no spaces nor brackets. If that's the case then we're
|
||||
done, and we can just return a list with one symbol in it.
|
||||
*/
|
||||
if (NULL == rest) {
|
||||
return make_symbol_node(text, strlen(text));
|
||||
}
|
||||
|
||||
/* How many chars have we got? */
|
||||
diff = rest - text;
|
||||
|
||||
if (diff) {
|
||||
thing = make_symbol_node(text, diff);
|
||||
if (tail) {
|
||||
tail->tail = thing;
|
||||
} else {
|
||||
/* There is no head now, so this must be the first
|
||||
result, the head that we will eventually return. */
|
||||
result = thing;
|
||||
}
|
||||
tail = thing;
|
||||
|
||||
} else if ('[' == rest[0]) {
|
||||
rest++;
|
||||
/* We need to parse a sub-list. */
|
||||
if (tail) {
|
||||
/* We have already parsed some terms, set them aside. */
|
||||
stack[s++] = tail;
|
||||
stack[s++] = result;
|
||||
tail = (struct list_node *)NULL;
|
||||
} else {
|
||||
/*
|
||||
This is going to be our first term when we're done parsing it.
|
||||
Do nothing, empty stack will signal it.
|
||||
*/
|
||||
|
||||
}
|
||||
} else if (']' == rest[0]) {
|
||||
rest++;
|
||||
if (s) {
|
||||
/* We have a tail on the stack. */
|
||||
thing = result;
|
||||
result = stack[--s];
|
||||
tail = stack[--s];
|
||||
tail->tail = thing;
|
||||
} else {
|
||||
/* This list must have been the first thing in the expr. */
|
||||
printf("wtf");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
result = make_symbol_node(text, diff);
|
||||
|
||||
/*
|
||||
This is the head of the list we are eventually going to return,
|
||||
so we assign it to the result
|
||||
We are going to be adding things to the tail of this list, so
|
||||
we'll need to keep track of that
|
||||
*/
|
||||
tail = result;
|
||||
|
||||
/*
|
||||
Now we want to
|
||||
*/
|
||||
while (NULL != rest) {
|
||||
|
||||
|
||||
if (head) {
|
||||
head->tail = current_list_node;
|
||||
} else {
|
||||
/* There is no head now, so this must be the first
|
||||
result, the head that we will eventually return. */
|
||||
result = current_list_node;
|
||||
}
|
||||
head = current_list_node;
|
||||
}
|
||||
|
||||
/* The next char is a space or '[' or ']'. */
|
||||
if ('[' == rest[0]) {
|
||||
if (head) {
|
||||
stack[s++] = head;
|
||||
head = (struct list_node *)NULL;
|
||||
}
|
||||
/*printf("%c\n", rest[0]);*/
|
||||
}
|
||||
|
||||
if (']' == rest[0]) {
|
||||
current_list_node = make_list_node(stack[s--]);
|
||||
if (head) {
|
||||
head->tail = current_list_node;
|
||||
} else {
|
||||
/* There is no head now, so this must be the first
|
||||
result, the head that we will eventually return. */
|
||||
result = current_list_node;
|
||||
}
|
||||
head = current_list_node;
|
||||
/*printf("%c\n", rest[0]);*/
|
||||
}
|
||||
|
||||
text = trim_leading_blanks(++rest);
|
||||
|
||||
/* calling strpbrk on NULL caused segfault! */
|
||||
rest = (NULL != text) ? strpbrk(text, " []") : text;
|
||||
}
|
||||
if (text) {
|
||||
current_list_node = make_symbol_node(text, strlen(text));
|
||||
if (head) {
|
||||
head->tail = current_list_node;
|
||||
} else {
|
||||
result = current_list_node;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
'
|
||||
Loading…
Reference in New Issue