From c537a9504395ff67ef56a0f65d61d859ca04ccd6 Mon Sep 17 00:00:00 2001 From: Sven Heidemann Date: Thu, 24 Sep 2020 20:44:16 +0200 Subject: [PATCH] [DevState] Added and tested with python sly --- src/Basic.py | 223 +++++++++++++++++++++++++++ src/CCLang/Interpreter.py | 31 ++++ src/CCLang/Lexer.py | 108 +++++++++++++ src/CCLang/Parser.py | 61 ++++++++ src/CCLang/__init__.py | 0 src/{First.bl => First.cc} | 7 +- src/Interpreter/Interpreter.py | 5 +- src/Interpreter/Repo.py | 2 +- src/Interpreter/Utils.py | 9 +- src/Models/CCLang/TokenDefinition.py | 67 ++++++++ src/Models/CCLang/__init__.py | 0 src/Models/Interpreter/Error.py | 1 + src/ServiceInitializer.py | 2 + src/{Main.py => cclang.py} | 25 +-- 14 files changed, 521 insertions(+), 20 deletions(-) create mode 100644 src/Basic.py create mode 100644 src/CCLang/Interpreter.py create mode 100644 src/CCLang/Lexer.py create mode 100644 src/CCLang/Parser.py create mode 100644 src/CCLang/__init__.py rename src/{First.bl => First.cc} (93%) create mode 100644 src/Models/CCLang/TokenDefinition.py create mode 100644 src/Models/CCLang/__init__.py rename src/{Main.py => cclang.py} (56%) diff --git a/src/Basic.py b/src/Basic.py new file mode 100644 index 0000000..9745710 --- /dev/null +++ b/src/Basic.py @@ -0,0 +1,223 @@ +from sly import Lexer +from sly import Parser + + +class BasicLexer(Lexer): + tokens = {NAME, NUMBER, STRING, IF, THEN, ELSE, FOR, FUN, TO, ARROW, EQEQ} + ignore = '\t ' + + literals = {'=', '+', '-', '/', '*', '(', ')', ',', ';'} + + # Define tokens + IF = r'IF' + THEN = r'THEN' + ELSE = r'ELSE' + FOR = r'FOR' + FUN = r'FUN' + TO = r'TO' + ARROW = r'->' + NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + STRING = r'\".*?\"' + + EQEQ = r'==' + + @_(r'\d+') + def NUMBER(self, t): + t.value = int(t.value) + return t + + @_(r'#.*') + def COMMENT(self, t): + pass + + @_(r'\n+') + def newline(self, t): + self.lineno = t.value.count('\n') + + +class BasicParser(Parser): + tokens = BasicLexer.tokens + + precedence = ( + ('left', '+', '-'), + ('left', '*', '/'), + ('right', 'UMINUS'), + ) + + def __init__(self): + self.env = {} + + @_('') + def statement(self, p): + pass + + @_('FOR var_assign TO expr THEN statement') + def statement(self, p): + return ('for_loop', ('for_loop_setup', p.var_assign, p.expr), p.statement) + + @_('IF condition THEN statement ELSE statement') + def statement(self, p): + return ('if_stmt', p.condition, ('branch', p.statement0, p.statement1)) + + @_('FUN NAME "(" ")" ARROW statement') + def statement(self, p): + return ('fun_def', p.NAME, p.statement) + + @_('NAME "(" ")"') + def statement(self, p): + return ('fun_call', p.NAME) + + @_('expr EQEQ expr') + def condition(self, p): + return ('condition_eqeq', p.expr0, p.expr1) + + @_('var_assign') + def statement(self, p): + return p.var_assign + + @_('NAME "=" expr') + def var_assign(self, p): + return ('var_assign', p.NAME, p.expr) + + @_('NAME "=" STRING') + def var_assign(self, p): + return ('var_assign', p.NAME, p.STRING) + + @_('expr') + def statement(self, p): + return (p.expr) + + @_('expr "+" expr') + def expr(self, p): + return ('add', p.expr0, p.expr1) + + @_('expr "-" expr') + def expr(self, p): + return ('sub', p.expr0, p.expr1) + + @_('expr "*" expr') + def expr(self, p): + return ('mul', p.expr0, p.expr1) + + @_('expr "/" expr') + def expr(self, p): + return ('div', p.expr0, p.expr1) + + @_('"-" expr %prec UMINUS') + def expr(self, p): + return p.expr + + @_('NAME') + def expr(self, p): + return ('var', p.NAME) + + @_('NUMBER') + def expr(self, p): + return ('num', p.NUMBER) + + +class BasicExecute: + + def __init__(self, tree, env): + self.env = env + result = self.walkTree(tree) + if result is not None and isinstance(result, int): + print(result) + if isinstance(result, str) and result[0] == '"': + print(result) + + def walkTree(self, node): + + if isinstance(node, int): + return node + if isinstance(node, str): + return node + + if node is None: + return None + + if node[0] == 'program': + if node[1] == None: + self.walkTree(node[2]) + else: + self.walkTree(node[1]) + self.walkTree(node[2]) + + if node[0] == 'num': + return node[1] + + if node[0] == 'str': + return node[1] + + if node[0] == 'if_stmt': + result = self.walkTree(node[1]) + if result: + return self.walkTree(node[2][1]) + return self.walkTree(node[2][2]) + + if node[0] == 'condition_eqeq': + return self.walkTree(node[1]) == self.walkTree(node[2]) + + if node[0] == 'fun_def': + self.env[node[1]] = node[2] + + if node[0] == 'fun_call': + try: + return self.walkTree(self.env[node[1]]) + except LookupError: + print("Undefined function '%s'" % node[1]) + return 0 + + if node[0] == 'add': + return self.walkTree(node[1]) + self.walkTree(node[2]) + elif node[0] == 'sub': + return self.walkTree(node[1]) - self.walkTree(node[2]) + elif node[0] == 'mul': + return self.walkTree(node[1]) * self.walkTree(node[2]) + elif node[0] == 'div': + return self.walkTree(node[1]) / self.walkTree(node[2]) + + if node[0] == 'var_assign': + self.env[node[1]] = self.walkTree(node[2]) + return node[1] + + if node[0] == 'var': + try: + return self.env[node[1]] + except LookupError: + print("Undefined variable '" + node[1] + "' found!") + return 0 + + if node[0] == 'for_loop': + if node[1][0] == 'for_loop_setup': + loop_setup = self.walkTree(node[1]) + + loop_count = self.env[loop_setup[0]] + loop_limit = loop_setup[1] + + for i in range(loop_count + 1, loop_limit + 1): + res = self.walkTree(node[2]) + if res is not None: + print(res) + self.env[loop_setup[0]] = i + del self.env[loop_setup[0]] + + if node[0] == 'for_loop_setup': + return (self.walkTree(node[1]), self.walkTree(node[2])) + + +if __name__ == '__main__': + lexer = BasicLexer() + parser = BasicParser() + env = {} + while True: + try: + text = input('basic > ') + except EOFError: + break + if text: + tokens = lexer.tokenize(text) + tree = parser.parse(tokens) + for t in tree: + print(t) + # BasicExecute(tree, env) diff --git a/src/CCLang/Interpreter.py b/src/CCLang/Interpreter.py new file mode 100644 index 0000000..edbb29a --- /dev/null +++ b/src/CCLang/Interpreter.py @@ -0,0 +1,31 @@ +from typing import Optional + +from CCLang.Parser import Parser +from Interpreter.Validator import Validator +from CCLang.Lexer import Lexer +from Interpreter.Repo import Repo +from Interpreter.Utils import Utils +from Models.AbstractSyntaxTree.AbstractSyntaxTree import AbstractSyntaxTree + + +class Interpreter: + + def __init__(self, repo: Repo, utils: Utils) -> None: + self.__repo = repo + self.__utils = utils + # self.__lexer = Lexer(repo, utils) + # self.__parser = Parser(repo, utils) + # self.__validator = Validator(repo, utils) + + def interpret(self, line_str: str) -> None: + """ + Interprets code line + :param line_str: + :return: + """ + lexer = Lexer() + parser = Parser() + env = {} + ast = parser.parse(lexer.tokenize(line_str)) + if ast is not None: + print(ast) diff --git a/src/CCLang/Lexer.py b/src/CCLang/Lexer.py new file mode 100644 index 0000000..06cdd1e --- /dev/null +++ b/src/CCLang/Lexer.py @@ -0,0 +1,108 @@ +from sly import Lexer as SlyLexer + +from Models.CCLang.TokenDefinition import TokenDefinition + + +class Lexer(SlyLexer): + # Ignored pattern + ignore = '\t ' + # ignore_comment = r'(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)|([#].*)' + ignore_comment = r'([#].*|(//.*))' + ignore_newline = r'\n+' + + tokens = { + LIBRARY, + CLASS, + FUNCTION, + VARIABLE, + USE, + FROM, + OUTPUT, + INPUT, + LENGTH, + RANGE, + EXIT, + IF, + ELSEIF, + ELSE, + CONTINUE, + IN, + RETURN, + WHILE, + FOR, + PUBLIC, + THIS, + LBRACE, + RBRACE, + LPARAN, + RPARAN, + LBRACKET, + RBRACKET, + SEMICOLON, + COLON, + COMMA, + POINT, + PLUS, + MINUS, + ASTERIK, + SLASH, + EQUAL, + CARET, + TRUE, + FALSE, + STRING, + NUMBER, + EMPTY, + + NAME + } + + # token definition + LIBRARY = TokenDefinition.Library.value + CLASS = TokenDefinition.Class.value + FUNCTION = TokenDefinition.Function.value + VARIABLE = TokenDefinition.Variable.value + USE = TokenDefinition.Use.value + FROM = TokenDefinition.From.value + OUTPUT = TokenDefinition.Output.value + INPUT = TokenDefinition.Input.value + LENGTH = TokenDefinition.Length.value + RANGE = TokenDefinition.Range.value + EXIT = TokenDefinition.Exit.value + IF = TokenDefinition.If.value + ELSEIF = TokenDefinition.ElseIf.value + ELSE = TokenDefinition.Else.value + CONTINUE = TokenDefinition.Continue.value + IN = TokenDefinition.In.value + RETURN = TokenDefinition.Return.value + WHILE = TokenDefinition.While.value + FOR = TokenDefinition.For.value + PUBLIC = TokenDefinition.Public.value + THIS = TokenDefinition.This.value + LBRACE = TokenDefinition.Left_Brace.value + RBRACE = TokenDefinition.Right_Brace.value + LPARAN = TokenDefinition.Left_Parenthesis.value + RPARAN = TokenDefinition.Right_Parenthesis.value + LBRACKET = TokenDefinition.Left_Bracket.value + RBRACKET = TokenDefinition.Right_Bracket.value + SEMICOLON = TokenDefinition.Semicolon.value + COLON = TokenDefinition.Colon.value + COMMA = TokenDefinition.Comma.value + POINT = TokenDefinition.Point.value + PLUS = TokenDefinition.Plus.value + MINUS = TokenDefinition.Minus.value + ASTERIK = TokenDefinition.Asterisk.value + SLASH = TokenDefinition.Slash.value + EQUAL = TokenDefinition.Equal.value + CARET = TokenDefinition.Caret.value + TRUE = TokenDefinition.BoolTrue.value + FALSE = TokenDefinition.BoolFalse.value + STRING = TokenDefinition.String.value + NUMBER = TokenDefinition.Number.value + EMPTY = TokenDefinition.Empty.value + + NAME = TokenDefinition.Name.value + + def error(self, t): + print("Illegal character '%s'" % t.value[0]) + # self.index += 1 diff --git a/src/CCLang/Parser.py b/src/CCLang/Parser.py new file mode 100644 index 0000000..ddaea80 --- /dev/null +++ b/src/CCLang/Parser.py @@ -0,0 +1,61 @@ +from sly import Parser as SlyParser + +from CCLang.Lexer import Lexer + + +class Parser(SlyParser): + tokens = Lexer.tokens + + #precedence = ( + # ('left', '+', '-'), + # ('left', '*', '/'), + # ('right', 'UMINUS'), + #) + + def __init__(self): + self.env = {} + + @_('') + def statement(self, p): + pass + + def error(self, p): + print(f'ERROR: {p}') + pass + + # lib definition + @_('PUBLIC LIBRARY NAME LBRACE') + def statement(self, p): + return ('lib_def', p.NAME, True) + + @_('LIBRARY NAME LBRACE') + def statement(self, p): + return ('lib_def', p.NAME, False) + + # class definition + @_('PUBLIC CLASS NAME LBRACE') + def statement(self, p): + return ('class_def', p.NAME, True) + + @_('CLASS NAME LBRACE') + def statement(self, p): + return ('class_def', p.NAME, False) + + # func definition + @_('PUBLIC FUNCTION NAME LPARAN statement RPARAN COLON type LBRACE') + def statement(self, p): + return ('func_def', p.NAME, True) + + @_('FUNCTION NAME LPARAN RPARAN COLON type LBRACE') + def statement(self, p): + return ('func_def', p.NAME, False) + + # types + @_('EMPTY') + def type(self, p): + return ('type', p.EMPTY) + + # right brace + @_('RBRACE') + def statement(self, p): + return ('end', p.RBRACE) diff --git a/src/CCLang/__init__.py b/src/CCLang/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/First.bl b/src/First.cc similarity index 93% rename from src/First.bl rename to src/First.cc index a034ffd..cac1b3e 100644 --- a/src/First.bl +++ b/src/First.cc @@ -1,8 +1,5 @@ -// hi1 -# hi2 -/* - hi3 -*/ +# hi1 +// hi2 public lib Main { class Program { diff --git a/src/Interpreter/Interpreter.py b/src/Interpreter/Interpreter.py index ea3169c..ce3a1d9 100644 --- a/src/Interpreter/Interpreter.py +++ b/src/Interpreter/Interpreter.py @@ -2,7 +2,6 @@ from typing import Optional from Interpreter.Validator import Validator from Interpreter.Lexer import Lexer -# from Interpreter.Parser_Old import Parser from Interpreter.Parser import Parser from Interpreter.Repo import Repo from Interpreter.Utils import Utils @@ -27,10 +26,10 @@ class Interpreter: tokens = [] ast: Optional[AbstractSyntaxTree] = None - if self.__repo.is_error is None: + if self.__repo.error is None: tokens = self.__lexer.tokenize(line_str) - if self.__repo.is_error is None: + if self.__repo.error is None: ast = self.__parser.parse(tokens) """ print('#####\n') diff --git a/src/Interpreter/Repo.py b/src/Interpreter/Repo.py index 29e12d8..0e2b83c 100644 --- a/src/Interpreter/Repo.py +++ b/src/Interpreter/Repo.py @@ -70,4 +70,4 @@ class Repo: self.bool_values = [Booleans.Right.value, Booleans.Wrong.value] # runtime - self.is_error = None + self.error = None diff --git a/src/Interpreter/Utils.py b/src/Interpreter/Utils.py index a35b03c..2697291 100644 --- a/src/Interpreter/Utils.py +++ b/src/Interpreter/Utils.py @@ -16,6 +16,11 @@ class Utils: print(f'-> {text}') def error(self, error: Error) -> None: - self.__repo.is_error = error - print(colored(f'Error in line {self.__repo.line_number}\n{self.__repo.is_error.msg}', 'red')) + self.__repo.error = error + print(colored(f'Error in line {self.__repo.line_number}\n{self.__repo.error.msg}', 'red')) + # exit() + + def runtime_error(self, error: Error) -> None: + self.__repo.error = error + print(colored(f'{self.__repo.error.msg}', 'red')) # exit() diff --git a/src/Models/CCLang/TokenDefinition.py b/src/Models/CCLang/TokenDefinition.py new file mode 100644 index 0000000..bdb0fa3 --- /dev/null +++ b/src/Models/CCLang/TokenDefinition.py @@ -0,0 +1,67 @@ +from enum import Enum + + +class TokenDefinition(Enum): + """ Keywords """ + # define keywords + Library = r'lib' + Class = r'class' + Function = r'func' + Variable = r'var' + Use = r'use' + From = r'from' + + # builtin functions + Output = r'output' + Input = r'input' + Length = r'length' + Range = r'range' + Exit = r'exit' + + # normal keywords + If = r'if' + ElseIf = r'elseif' + Else = r'else' + Continue = r'continue' + In = r'in' + Return = r'return' + + # loops + While = r'while' + For = r'for' + + # access + Public = r'public' + This = r'this' + + """ Chars """ + # format + Left_Brace = r'\{' + Right_Brace = r'\}' + Left_Parenthesis = r'\(' + Right_Parenthesis = r'\)' + Left_Bracket = r'\[' + Right_Bracket = r'\]' + Semicolon = r'\;' + Colon = r'\:' + Comma = r'\,' + Point = r'\.' + # expr + Plus = r'\+' + Minus = r'\-' + Asterisk = r'\*' + Slash = r'\/' + Equal = r'\=' + Caret = r'\^' + + """ Values """ + # bool + BoolTrue = r'true' + BoolFalse = r'false' + + Name = r'[a-zA-Z_][a-zA-Z0-9_]*' + String = r'\".*?\"' + Number = r'\d+' + + """ Datatypes """ + Empty = r'empty' diff --git a/src/Models/CCLang/__init__.py b/src/Models/CCLang/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/Models/Interpreter/Error.py b/src/Models/Interpreter/Error.py index c5cd81b..b72ee8b 100644 --- a/src/Models/Interpreter/Error.py +++ b/src/Models/Interpreter/Error.py @@ -4,6 +4,7 @@ from enum import Enum class ErrorCodes(Enum): StartFailed = 'Start failed' FileNotFound = 'File not found' + WrongFileType = 'Wrong file type' Unknown = 'Unknown {}' Inaccessible = '{} inaccessible' diff --git a/src/ServiceInitializer.py b/src/ServiceInitializer.py index 9789e21..5215d63 100644 --- a/src/ServiceInitializer.py +++ b/src/ServiceInitializer.py @@ -1,5 +1,6 @@ from Interpreter.Validator import Validator from Interpreter.Interpreter import Interpreter +from CCLang.Interpreter import Interpreter as CCLangInterpreter from Interpreter.Utils import Utils from Interpreter.Repo import Repo @@ -10,3 +11,4 @@ class ServiceInitializer: self.repo = Repo() self.utils = Utils(self.repo) self.interpreter = Interpreter(self.repo, self.utils) + self.cclang_interpreter = CCLangInterpreter(self.repo, self.utils) diff --git a/src/Main.py b/src/cclang.py similarity index 56% rename from src/Main.py rename to src/cclang.py index c0f2d44..9df5c6b 100644 --- a/src/Main.py +++ b/src/cclang.py @@ -12,6 +12,7 @@ class Main: self.__utils = self.__services.utils self.__repo = self.__services.repo self.__interpreter = self.__services.interpreter + self.__cclang_interpreter = self.__services.cclang_interpreter def console(self) -> None: """ @@ -19,9 +20,9 @@ class Main: :return: """ i = 0 - while self.__repo.is_error is not None: + while self.__repo.error is None: self.__repo.line_number = i + 1 - self.__interpreter.interpret(self.__repo.line_number, input('> ')) + self.__interpreter.interpret(input('> ')) i += 1 def files(self, file: str) -> None: @@ -30,13 +31,19 @@ class Main: :param file: :return: """ - if os.path.isfile(file): - f = open(file, 'r', encoding='utf-8').readlines() - for i in range(0, len(f)): - self.__repo.line_number = i + 1 - self.__interpreter.interpret(f[i]) - else: - self.__utils.is_error(Error(ErrorCodes.FileNotFound)) + if not os.path.isfile(file): + self.__utils.runtime_error(Error(ErrorCodes.FileNotFound)) + return + + if not file.endswith('.cc'): + self.__utils.runtime_error(Error(ErrorCodes.WrongFileType)) + return + + f = open(file, 'r', encoding='utf-8').readlines() + for i in range(0, len(f)): + self.__repo.line_number = i + 1 + # self.__interpreter.interpret(f[i]) + self.__cclang_interpreter.interpret(f[i]) if __name__ == '__main__':