finish usage of ast

hstoebel · Feb 20, 2020 · 5f1242d · 5f1242d
1 parent 92699e2
commit 5f1242d
Show file tree

Hide file tree

Showing 6 changed files with 75 additions and 46 deletions.
diff --git a/interpreter/app/ast/ast.py b/interpreter/app/ast/ast.py
@@ -58,7 +58,6 @@ def _feed_operator(self, token):
                             2<--+
 
         """
-        # import pdb; pdb.set_trace()
         number_to_replace = self.current_operator.right_value
         token.feed(number_to_replace)
         self.current_operator.feed(token)

diff --git a/interpreter/app/interpreter/interpreter.py b/interpreter/app/interpreter/interpreter.py
@@ -11,20 +11,24 @@ def __init__(self, text):
         # client string input, e.g. "3+5"
         self.text = self.strip_text(text)
         # self.pos is an index into self.text
-        self.pos = 0
+        self.pos = -1
         # current token instance
         self.current_token = None
+        self.advance_token()
         self.prev_token = None
 
     def error(self, msg='Error parsing input'):
         raise InterpreterParseError(msg)
 
-    def next_token(self):
+
+    def get_token(self, offset = 0):
         """Lexical analyzer (also known as scanner or tokenizer)
         This method is responsible for breaking a sentence
         apart into tokens. One token at a time.
 
         Gets the next token but doesn't update state.
+
+        offset: a number representing the the number of characters to offset self.pos.
         """
         text = self.text
 
@@ -36,12 +40,13 @@ def next_token(self):
 
         # get a character at the position self.pos and decide
         # what token to create based on the single character
-        current_char = text[self.pos]
+        current_char = text[self.pos + offset]
 
         # if the character is a digit then convert it to
         # integer, create an INTEGER token, increment self.pos
         # index to point to the next character after the digit,
         # and return the INTEGER token
+
         if current_char.isdigit():
             return token.IntToken(current_char)
         elif current_char == '+':
@@ -52,13 +57,24 @@ def next_token(self):
             return token.MultiplyToken()
         elif current_char == '/':
             return token.DivideToken()
+        elif current_char == ' ':
+            return token.SpaceToken()
         else:
             self.error()
 
-    def get_next_token(self):
-        token = self.next_token()
+    def advance_token(self):
+        """
+        advances to the next character in the text
+        updates state for
+            - pos
+            - prev_tokenc
+            - current_token
+        """
+
         self.pos += 1
-        return token
+        next_token = self.get_token()
+        self.prev_token = self.current_token
+        self.current_token = next_token
 
     def done(self):
         return isinstance(self.current_token, token.EOFToken)
@@ -72,15 +88,14 @@ def eat(self, *token_types):
         """
 
         if self.current_token.type in token_types:
-            self.prev_token = self.current_token
-            self.current_token = self.get_next_token()
+            self.advance_token()
         else:
             self.error()
 
     def eat_integers(self) -> token.IntWrapper:
         """
         eats integers tokens until a non integer is found. 
-        Return 
+        Returns IntWrapper
         """
         tokens = []
         while True:
@@ -90,38 +105,33 @@ def eat_integers(self) -> token.IntWrapper:
                 self.eat(token.INTEGER)
                 tokens.append(curr_token)
             except InterpreterParseError as e:
+                # the token isn't an integer. If its a space and the next character is an integer too, that's a corner case we need to account for
+                next_token = self.get_token(1)
+                if curr_token.type == 'SPACE' and next_token.type == 'INTEGER':
+                    self.error('illegal space detected')
                 return token.IntWrapper(tokens)
+
+    def eat_operator(self) -> token.OperatorToken:
+        """
+        eats the next character expecting it to be an operator
+        """
+        self.eat(token.PLUS, token.MINUS, token.MULTIPLY, token.DIVIDE)
+        return self.prev_token
 
     def expr(self):
         """expr -> INTEGER PLUS INTEGER"""
-        # set current token to the first token taken from the input
-        # self.current_token = self.get_next_token()
-
-        # left = self.eat_integers()
-
-        # # we expect the current token to be a '+' or '-' token
-        # operator = self.current_token
-        # self.eat(token.PLUS, token.MINUS, token.MULTIPLY, token.DIVIDE)
+        ast = AST()
 
-        # # we expect the current token to be a single-digit integer
-        # right = self.eat_integers()
+        while not self.done():
+            if isinstance(self.current_token, token.OperatorToken):
+                ast.feed(self.eat_operator())
+            else:
+                ast.feed(self.eat_integers())
 
-        # operator.left_value = left
-        # operator.right_value = right
-        # # after the above call the self.current_token is set to
-        # # EOF token
-        # return operator.value
+        # number, operator, number, operator...number
 
-        self.current_token = self.get_next_token()
-        ast = AST()
+            # at this point, current token should be EOF
 
-        while not self.done():
-            ast.feed(self.eat_integers())
-            operator = self.current_token
-            self.eat(token.PLUS, token.MINUS, token.MULTIPLY, token.DIVIDE)
-            ast.feed(operator)
-            ast.feed(self.eat_integers())
-
         return ast.value
 
     def strip_text(self, text):
@@ -132,7 +142,7 @@ def strip_text(self, text):
         """
 
         try:
-            operator = re.search(r'[\+-\.\*\/]', text).group(0)
+            operator = re.search(r'[\+\-\*\/]', text).group(0)
         except(AttributeError):
             raise self.error('expression does not contain an operator')
 

diff --git a/interpreter/app/token/__init__.py b/interpreter/app/token/__init__.py
@@ -2,11 +2,13 @@
 from .utils import *
 
 __all__ = [
+    'SPACE', 'SpaceToken',
     'INTEGER', 'IntToken',
     'INT_WRAPPER', 'IntWrapper',
     'PLUS', 'AddToken',
     'MINUS', 'SubtractToken',
     'MULTIPLY', 'MultiplyToken',
+    'DIVIDE', 'DivideToken',
     'EOF', 'EOFToken',
     'int_wrapper_factory'
 ]
diff --git a/interpreter/app/token/token.py b/interpreter/app/token/token.py
@@ -23,6 +23,11 @@ def __str__(self):
     def __repr__(self):
         return self.__str__()
 
+class SpaceToken(object):
+    def __init__(self) -> None:
+        self.type = SPACE
+        self.value = None
+
 class OperatorToken(Token):
     def __init__(self) -> None:
         __slots__ = 'left_value', 'right_value'

diff --git a/interpreter/app/token/utils.py b/interpreter/app/token/utils.py
@@ -1,5 +1,20 @@
-from app.token import IntWrapper, IntToken
+# from app.token import Token, IntWrapper, IntToken
+import app.token as token
+
+def token_factory(char: str) -> token.Token:
+    if char.isdigit():
+        return token.IntToken(char)
+    elif char == '+':
+        return token.AddToken()
+    elif char == '-':
+        return token.SubtractToken()
+    elif char == '*':
+        return token.MultiplyToken()
+    elif char == '/':
+        return token.DivideToken()
+    elif char == ' ':
+        return token.SpaceToken()
 
 def int_wrapper_factory(int_str: str):
-    int_tokens = [IntToken(char) for char in int_str]
-    return IntWrapper(int_tokens)
+    int_tokens = [token.IntToken(char) for char in int_str]
+    return token.IntWrapper(int_tokens)
diff --git a/interpreter/tests/test_interpreter.py b/interpreter/tests/test_interpreter.py
@@ -1,8 +1,6 @@
 import pytest
 from app.interpreter import *
 
-# import pdb; pdb.set_trace()
-
 def test_single_digit():
     # test digit plus digit
     assert calc('1+2') == 3
@@ -40,14 +38,14 @@ def test_arbitrary_add_or_subtract():
     ('', InterpreterParseError),
     ('1', InterpreterParseError),
 
-    ('1+', ValueError),
-    ('+1', ValueError),
-    ('+', ValueError),
+    ('1+', IndexError),
+    ('+1', AttributeError),
+    ('+', AttributeError),
     ('1 0 + 3', InterpreterParseError),
 
-    ('1-', ValueError),
-    ('-1', ValueError),
-    ('-', ValueError),
+    ('1-', IndexError),
+    ('-1', AttributeError),
+    ('-', AttributeError),
     ('1 0 - 3', InterpreterParseError),
 ]