Simpler tokenizer.
Once the syntax was simple enough the re.Scanner became overkill. From http://norvig.com/lispy.html by way of https://github.com/ckkashyap/s2c
This commit is contained in:
parent
2ad303c247
commit
4fbe2ed4a0
|
|
@ -56,7 +56,6 @@ Ulam Spiral).
|
||||||
'''
|
'''
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from inspect import getdoc
|
from inspect import getdoc
|
||||||
from re import Scanner
|
|
||||||
from traceback import print_exc
|
from traceback import print_exc
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
|
|
@ -335,27 +334,6 @@ around square brackets.
|
||||||
JOY_BOOL_LITERALS = _F, _T = 'false', 'true'
|
JOY_BOOL_LITERALS = _F, _T = 'false', 'true'
|
||||||
|
|
||||||
|
|
||||||
BRACKETS = r'\[|\]' # Left or right square bracket.
|
|
||||||
BLANKS = r'\s+' # One-or-more blankspace.
|
|
||||||
WORDS = (
|
|
||||||
'[' # Character class
|
|
||||||
'^' # not a
|
|
||||||
'[' # left square bracket nor a
|
|
||||||
r'\]' # right square bracket (escaped so it doesn't close the character class)
|
|
||||||
r'\s' # nor blankspace
|
|
||||||
']+' # end character class, one-or-more.
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
token_scanner = Scanner(
|
|
||||||
[
|
|
||||||
(BRACKETS, lambda _, token: token),
|
|
||||||
(BLANKS, None),
|
|
||||||
(WORDS, lambda _, token: token),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ParseError(ValueError):
|
class ParseError(ValueError):
|
||||||
'''
|
'''
|
||||||
Raised when there is a error while parsing text.
|
Raised when there is a error while parsing text.
|
||||||
|
|
@ -382,23 +360,7 @@ def text_to_expression(text):
|
||||||
:rtype: stack
|
:rtype: stack
|
||||||
:raises ParseError: if the parse fails.
|
:raises ParseError: if the parse fails.
|
||||||
'''
|
'''
|
||||||
return _parse(_tokenize(text))
|
return _parse(text.replace('[', ' [ ').replace(']', ' ] ').split())
|
||||||
|
|
||||||
|
|
||||||
def _tokenize(text):
|
|
||||||
'''Convert a text into a stream of tokens.
|
|
||||||
|
|
||||||
Converts function names to Symbols.
|
|
||||||
|
|
||||||
Raise ParseError (with some of the failing text) if the scan fails.
|
|
||||||
'''
|
|
||||||
tokens, rest = token_scanner.scan(text)
|
|
||||||
if rest:
|
|
||||||
raise ParseError(
|
|
||||||
'Scan failed at position %i, %r'
|
|
||||||
% (len(text) - len(rest), rest[:10])
|
|
||||||
)
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
|
|
||||||
def _parse(tokens):
|
def _parse(tokens):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue