start of interpreter

hstoebel · Dec 21, 2019 · 29f37ab · 29f37ab
1 parent a9c9689
commit 29f37ab
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 0 deletions.
diff --git a/interpreter/README.md b/interpreter/README.md
@@ -0,0 +1,3 @@
+Notes from Ruslan Spivak's Let’s Build A Simple Interpreter course
+https://ruslanspivak.com/lsbasi-part1/
+
diff --git a/interpreter/calc1.py b/interpreter/calc1.py
@@ -0,0 +1,126 @@
+# Token types
+#
+# EOF (end-of-file) token is used to indicate that
+# there is no more input left for lexical analysis
+INTEGER, PLUS, EOF = 'INTEGER', 'PLUS', 'EOF'
+
+
+class Token(object):
+    def __init__(self, type, value):
+        # token type: INTEGER, PLUS, or EOF
+        self.type = type
+        # token value: 0, 1, 2. 3, 4, 5, 6, 7, 8, 9, '+', or None
+        self.value = value
+
+    def __str__(self):
+        """String representation of the class instance.
+        Examples:
+            Token(INTEGER, 3)
+            Token(PLUS '+')
+        """
+        return 'Token({type}, {value})'.format(
+            type=self.type,
+            value=repr(self.value)
+        )
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class Interpreter(object):
+    def __init__(self, text):
+        # client string input, e.g. "3+5"
+        self.text = text
+        # self.pos is an index into self.text
+        self.pos = 0
+        # current token instance
+        self.current_token = None
+
+    def error(self):
+        raise Exception('Error parsing input')
+
+    def get_next_token(self):
+        """Lexical analyzer (also known as scanner or tokenizer)
+        This method is responsible for breaking a sentence
+        apart into tokens. One token at a time.
+        """
+        text = self.text
+
+        # is self.pos index past the end of the self.text ?
+        # if so, then return EOF token because there is no more
+        # input left to convert into tokens
+        if self.pos > len(text) - 1:
+            return Token(EOF, None)
+
+        # get a character at the position self.pos and decide
+        # what token to create based on the single character
+        current_char = text[self.pos]
+
+        # if the character is a digit then convert it to
+        # integer, create an INTEGER token, increment self.pos
+        # index to point to the next character after the digit,
+        # and return the INTEGER token
+        if current_char.isdigit():
+            token = Token(INTEGER, int(current_char))
+            self.pos += 1
+            return token
+
+        if current_char == '+':
+            token = Token(PLUS, current_char)
+            self.pos += 1
+            return token
+
+        self.error()
+
+    def eat(self, token_type):
+        # compare the current token type with the passed token
+        # type and if they match then "eat" the current token
+        # and assign the next token to the self.current_token,
+        # otherwise raise an exception.
+        if self.current_token.type == token_type:
+            self.current_token = self.get_next_token()
+        else:
+            self.error()
+
+    def expr(self):
+        """expr -> INTEGER PLUS INTEGER"""
+        # set current token to the first token taken from the input
+        self.current_token = self.get_next_token()
+
+        # we expect the current token to be a single-digit integer
+        left = self.current_token
+        self.eat(INTEGER)
+
+        # we expect the current token to be a '+' token
+        op = self.current_token
+        self.eat(PLUS)
+
+        # we expect the current token to be a single-digit integer
+        right = self.current_token
+        self.eat(INTEGER)
+        # after the above call the self.current_token is set to
+        # EOF token
+
+        # at this point INTEGER PLUS INTEGER sequence of tokens
+        # has been successfully found and the method can just
+        # return the result of adding two integers, thus
+        # effectively interpreting client input
+        result = left.value + right.value
+        return result
+
+
+def main():
+    while True:
+        try:
+            text = input('calc> ')
+        except EOFError:
+            break
+        if not text:
+            continue
+        interpreter = Interpreter(text)
+        result = interpreter.expr()
+        print(result)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/interpreter/notes.md b/interpreter/notes.md
@@ -0,0 +1,8 @@
+# Part 1
+
+Compiler takes source code and preprocesses it into a machine language.
+
+An interpreter interprets source code without turning it into machine language first. 
+
+Lexical analysis: read input of characters and convert into a stream of tokens.
+Token: an object represetning type and value. Example: the token "3" is an integer with value of 3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Notes from Ruslan Spivak's Let’s Build A Simple Interpreter course
		https://ruslanspivak.com/lsbasi-part1/