Simpler tokenizer.

Once the syntax was simple enough the re.Scanner became overkill. From http://norvig.com/lispy.html by way of https://github.com/ckkashyap/s2c
2022-09-10 17:08:46 -07:00 · 2022-09-10 17:08:46 -07:00 · 4fbe2ed4a0
parent 2ad303c247
commit 4fbe2ed4a0
1 changed files with 1 additions and 39 deletions
--- a/implementations/Python/simplejoy.py
+++ b/implementations/Python/simplejoy.py
@ -56,7 +56,6 @@ Ulam Spiral).
 '''
 from functools import wraps
 from inspect import getdoc
 from re import Scanner
 from traceback import print_exc
 import operator
@ -335,27 +334,6 @@ around square brackets.
 JOY_BOOL_LITERALS = _F, _T = 'false', 'true'
 BRACKETS = r'\[|\]'  # Left or right square bracket.
 BLANKS = r'\s+'  # One-or-more blankspace.
 WORDS = (
    '['  # Character class
    '^'  # not a
    '['  # left square bracket nor a
    r'\]'  # right square bracket (escaped so it doesn't close the character class)
    r'\s'  # nor blankspace
    ']+'  # end character class, one-or-more.
 )
 token_scanner = Scanner(
    [
        (BRACKETS, lambda _, token: token),
        (BLANKS, None),
        (WORDS, lambda _, token: token),
    ]
 )
 class ParseError(ValueError):
    '''
    Raised when there is a error while parsing text.
@ -382,23 +360,7 @@ def text_to_expression(text):
    :rtype: stack
    :raises ParseError: if the parse fails.
    '''
-    return _parse(_tokenize(text))
+    return _parse(text.replace('[', ' [ ').replace(']', ' ] ').split())
 def _tokenize(text):
    '''Convert a text into a stream of tokens.
    Converts function names to Symbols.
    Raise ParseError (with some of the failing text) if the scan fails.
    '''
    tokens, rest = token_scanner.scan(text)
    if rest:
        raise ParseError(
            'Scan failed at position %i, %r'
            % (len(text) - len(rest), rest[:10])
        )
    return tokens
 def _parse(tokens):