From 38353246b2a241e8f7c4710dc6bd81547bdc1962 Mon Sep 17 00:00:00 2001 From: edraft Date: Sun, 24 May 2020 17:56:15 +0200 Subject: [PATCH] added parser logic --- doc/ast_rules.txt | 9 ++ doc/error_codes.txt | 13 -- doc/test.bl | 14 ++- src/Interpreter/Lexer.py | 8 +- src/Interpreter/Parser.py | 178 +++++++++++++++++++++++++++- src/Interpreter/Repo.py | 3 +- src/Interpreter/Utils.py | 7 +- src/Main.py | 2 +- src/Models/Interpreter/Error.py | 33 ++++++ src/Models/Language/AST/Class.py | 6 + src/Models/Language/AST/Func.py | 6 + src/Models/Language/AST/Lib.py | 5 + src/Models/Language/AST/__init__.py | 0 src/Models/Language/Error.py | 5 - 14 files changed, 260 insertions(+), 29 deletions(-) create mode 100644 doc/ast_rules.txt delete mode 100644 doc/error_codes.txt create mode 100644 src/Models/Interpreter/Error.py create mode 100644 src/Models/Language/AST/Class.py create mode 100644 src/Models/Language/AST/Func.py create mode 100644 src/Models/Language/AST/Lib.py create mode 100644 src/Models/Language/AST/__init__.py delete mode 100644 src/Models/Language/Error.py diff --git a/doc/ast_rules.txt b/doc/ast_rules.txt new file mode 100644 index 0000000..c1a571a --- /dev/null +++ b/doc/ast_rules.txt @@ -0,0 +1,9 @@ +len 3: + + lib main { + +min len 3, max len 4: + class test { + +min len 5 + func test() { \ No newline at end of file diff --git a/doc/error_codes.txt b/doc/error_codes.txt deleted file mode 100644 index c7f6c8d..0000000 --- a/doc/error_codes.txt +++ /dev/null @@ -1,13 +0,0 @@ -Interpreter: - 1.0 Start failed - 1.1 File not found - -Runtime: - 2.0 Unknown keyword - 2.1 Unknown type - 2.2 Unknown variable - 2.3 Unknown function - 2.4 Unknown class - 2.5 Unknown library - 2.6 Access error: no export - 2.7 Expression error \ No newline at end of file diff --git a/doc/test.bl b/doc/test.bl index fcb0d50..e3c703b 100644 --- a/doc/test.bl +++ b/doc/test.bl @@ -9,6 +9,8 @@ lib Main { func Main() { testBool: bool; testEmpty: emptyType = empty; + testNum: number = 3.0; + testBool_2: bool = 3 > 1; output('Hello World'); output(66); output(3 + 3); @@ -22,11 +24,21 @@ lib Main { test1234(range(0, 10)); } - public func test1234(param: list) { + # public func test1234(param: list) { + public func test1234() + { /*for i in range(0, length(param)) { output(i); }*/ pass; } } +} + +lib test +{ + public class Test + { + + } } \ No newline at end of file diff --git a/src/Interpreter/Lexer.py b/src/Interpreter/Lexer.py index d4255db..4592a89 100644 --- a/src/Interpreter/Lexer.py +++ b/src/Interpreter/Lexer.py @@ -57,7 +57,7 @@ class Lexer: i += 2 # end of number - elif not c.isdigit() and is_number: + elif not c.isdigit() and c != '.' and is_number: self.__add_tok(word, 'number') word = '' is_number = False @@ -102,7 +102,7 @@ class Lexer: is_number = True # continue number - elif c.isdigit() and is_number: + elif (c.isdigit() or c == '.') and is_number: word += c # begin expression char @@ -115,7 +115,7 @@ class Lexer: word += c # bool expression char - elif c in self.__repo.expr_chars: + elif c in self.__repo.bool_expr_chars: self.__add_tok(word, 'word') self.__add_tok(c, 'bool_expr_char') word = '' @@ -134,5 +134,5 @@ class Lexer: if line[i - 1] == '*' and c == '/': self.__ml_comment = False - self.__repo.output_tokens(self.__toks) + # self.__repo.output_tokens(self.__toks) return self.__toks diff --git a/src/Interpreter/Parser.py b/src/Interpreter/Parser.py index f765773..9b40e08 100644 --- a/src/Interpreter/Parser.py +++ b/src/Interpreter/Parser.py @@ -1,6 +1,10 @@ from Interpreter.Repo import Repo from Interpreter.Utils import Utils +from Models.Interpreter.Error import Error from Models.Interpreter.Token import Token +from Models.Language.AST.Class import Class +from Models.Language.AST.Func import Func +from Models.Language.AST.Lib import Lib class Parser: @@ -9,6 +13,178 @@ class Parser: self.__repo = repo self.__utils = utils + # runtime representation + self.__lib = None + self.__class = None + self.__func = None + + # helpers + self.__tokens = [] # reset each line + self.__i = 0 # for loop index + + self.__token_storage = [] + self.__is_start_lib = False + self.__is_start_class = False + self.__is_start_func = False + self.__is_public = False + def parse(self, toks: list) -> None: + self.__tokens = toks # self.__repo.output_tokens(toks) - pass + # output + if len(toks) > 0: + tokens = [] + for t in toks: + tokens.append({t.value: t.type}) + + # print(tokens) + + self.__check() + + # output + if len(self.__repo.ast) > 1: + print('___') + for a_lib in self.__repo.ast: + print(a_lib.name) + for a_class in a_lib.ast: + print(a_class.name, a_class.access) + for a_funcs in a_class.ast: + print(a_funcs.name, a_funcs.access) + + print('___') + # print(self.__repo.ast, '\n') + + """ parser helpers """ + def __get_next_token(self) -> Token: + if len(self.__tokens) > self.__i + 1: + return self.__tokens[self.__i + 1] + else: + return Token('EOL', 'EOL') + + def __get_last_token(self) -> Token: + if len(self.__tokens) >= self.__i - 1: + return self.__tokens[self.__i - 1] + else: + return Token('EOL', 'EOL') + + def __get_token_by_i_dif(self, delta: int) -> Token: + return self.__tokens[self.__i + delta] + + def __is_scope_started(self) -> bool: + return self.__is_start_lib or self.__is_start_class or self.__is_start_func + + def __is_only_func_started(self) -> bool: + return not self.__is_start_lib and not self.__is_start_class and self.__is_start_func + + """ token checks """ + def __check(self): + # check tokens + for self.__i in range(0, len(self.__tokens)): + tok = self.__tokens[self.__i] + + if not self.__is_scope_started() and tok.type == 'keyword': + self.__check_keyword(tok) + + elif not self.__is_scope_started() and tok.type == 'type' or tok.type in self.__repo.types: + pass + + elif not self.__is_scope_started() and tok.type in self.__repo.var_types: + pass + + elif tok.type == 'format_char': + self.__check_format_char(tok) + + elif not self.__is_scope_started() and tok.type == 'expr_char': + pass + + elif not self.__is_scope_started() and tok.type == 'bool_expr_char': + self.__check_name(tok) + + elif tok.type == 'name': + self.__check_name(tok) + + else: + self.__utils.error(Error(2.9, f'{tok.type}: {tok.value}')) + + def __check_keyword(self, tok: Token) -> None: + if tok.value == 'lib': + self.__is_start_lib = True + + elif tok.value == 'class': + self.__is_start_class = True + + elif tok.value == 'func': + self.__is_start_func = True + + elif tok.value == 'public': + next = self.__get_next_token() + if next.type == 'keyword' and (next.value == 'class' or next.value == 'func'): + self.__is_public = True + else: + if next.type == 'EOL': + self.__utils.error(Error(2.8)) + else: + self.__utils.error(Error(2.9, next.value)) + + def __check_format_char(self, tok: Token) -> None: + if tok.value == '{': + if len(self.__token_storage) > 0 and self.__token_storage[0].type == 'name': + if self.__is_start_lib and not self.__is_start_class and not self.__is_start_func: + self.__lib = Lib(self.__token_storage[0].value) + self.__token_storage = [] + self.__is_start_lib = False + + elif not self.__is_start_lib and self.__is_start_class and not self.__is_start_func: + access = '' + if self.__is_public: + access = 'public' + self.__is_public = False + + self.__class = Class(self.__token_storage[0].value, access=access) + self.__token_storage = [] + self.__is_start_class = False + + elif not self.__is_start_lib and not self.__is_start_class and self.__is_start_func: + access = '' + if self.__is_public: + access = 'public' + self.__is_public = False + + self.__func = Func(self.__token_storage[0].value, access=access) + self.__token_storage = [] + self.__is_start_func = False + + else: + self.__utils.error(Error(2.9, f'{tok.type}: {tok.value}')) + + elif not self.__is_scope_started() and tok.value == '}': + if self.__lib is not None and self.__class is None and self.__func is None: + self.__repo.ast.append(self.__lib) + self.__lib = None + + elif self.__lib is not None and self.__class is not None and self.__func is None: + self.__lib.ast.append(self.__class) + self.__class = None + + elif self.__lib is not None and self.__class is not None and self.__func is not None: + self.__class.ast.append(self.__func) + self.__func = None + + else: + self.__utils.error(Error(2.9, f'{tok.type}: {tok.value}')) + + elif self.__is_only_func_started() and (tok.value == '(' or tok.value == ')'): + pass + + elif not self.__is_scope_started() and tok.value in self.__repo.format_chars: + pass + + else: + self.__utils.error(Error(2.9, f'{tok.type}: {tok.value}')) + + def __check_name(self, tok: Token) -> None: + if self.__is_start_lib or self.__is_start_class or self.__is_start_func: + if len(self.__token_storage) == 0: + self.__token_storage.append(tok) + else: + self.__utils.error(Error(2.9, f'{tok.type}: {tok.value}')) diff --git a/src/Interpreter/Repo.py b/src/Interpreter/Repo.py index 87a3c46..9fb86ad 100644 --- a/src/Interpreter/Repo.py +++ b/src/Interpreter/Repo.py @@ -44,13 +44,14 @@ class Repo: 'list', 'dict' ] + self.format_chars = ['{', '}', '(', ')', ';', ':', ','] self.expr_chars = ['+', '-', '*', '/', '=', '^'] self.bool_expr_chars = ['<', '>', '!', '!=', '==', '>=', '<='] self.bool_values = ['true', 'false'] - self.format_chars = ['{', '}', '(', ')', ';', ':', ','] # runtime self.error = None + self.ast = [] def output_tokens(self, toks: list) -> None: if self.debug and len(toks) > 0: diff --git a/src/Interpreter/Utils.py b/src/Interpreter/Utils.py index 18ec8bf..305183e 100644 --- a/src/Interpreter/Utils.py +++ b/src/Interpreter/Utils.py @@ -1,6 +1,7 @@ from termcolor import colored from Interpreter.Repo import Repo +from Models.Interpreter.Error import Error class Utils: @@ -14,6 +15,6 @@ class Utils: def output(self, text: str) -> None: print(f'-> {text}') - def error(self) -> None: - if self.__repo is not None: - print(colored(f'{self.__repo.error.code}: {self.__repo.error.msg}', 'red')) + def error(self, error: Error) -> None: + self.__repo.error = error + print(colored(f'{self.__repo.error.code}: {self.__repo.error.msg}', 'red')) diff --git a/src/Main.py b/src/Main.py index 4743c1b..ab5c43d 100644 --- a/src/Main.py +++ b/src/Main.py @@ -1,7 +1,7 @@ import os import sys -from Models.Language.Error import Error +from Models.Interpreter.Error import Error from ServiceInitializer import ServiceInitializer diff --git a/src/Models/Interpreter/Error.py b/src/Models/Interpreter/Error.py new file mode 100644 index 0000000..fba0c85 --- /dev/null +++ b/src/Models/Interpreter/Error.py @@ -0,0 +1,33 @@ +class Error: + + def __init__(self, code: float, msg: str = ''): + self.code = code + + self.__msgs = { + # Interpreter: + 1.0: 'Start failed', + 1.1: 'File not found', + + # Runtime: + 2.0: 'Unknown keyword', + 2.1: 'Unknown type', + 2.2: 'Unknown variable', + 2.3: 'Unknown function', + 2.4: 'Unknown class', + 2.5: 'Unknown library', + 2.6: 'Access error: no export', + 2.7: 'Expression error', + 2.8: 'Unexpected end of line', + 2.9: 'Unexpected {}', # other types + + # Parser: + 3.0: 'Lib in lib', + 3.1: 'Lib in class', + 3.2: 'Lib in func', + 3.3: 'Class in class', + 3.4: 'Class in func', + 3.5: 'Func in lib', + 3.6: 'Func in func', + } + + self.msg = self.__msgs[code].format(msg) diff --git a/src/Models/Language/AST/Class.py b/src/Models/Language/AST/Class.py new file mode 100644 index 0000000..bdaf408 --- /dev/null +++ b/src/Models/Language/AST/Class.py @@ -0,0 +1,6 @@ +class Class: + + def __init__(self, name: str, access: str = '') -> None: + self.name = name + self.ast = [] + self.access = access diff --git a/src/Models/Language/AST/Func.py b/src/Models/Language/AST/Func.py new file mode 100644 index 0000000..3d7fad1 --- /dev/null +++ b/src/Models/Language/AST/Func.py @@ -0,0 +1,6 @@ +class Func: + + def __init__(self, name: str, access: str = '') -> None: + self.name = name + self.ast = [] + self.access = access diff --git a/src/Models/Language/AST/Lib.py b/src/Models/Language/AST/Lib.py new file mode 100644 index 0000000..596a919 --- /dev/null +++ b/src/Models/Language/AST/Lib.py @@ -0,0 +1,5 @@ +class Lib: + + def __init__(self, name: str) -> None: + self.name = name + self.ast = [] diff --git a/src/Models/Language/AST/__init__.py b/src/Models/Language/AST/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/Models/Language/Error.py b/src/Models/Language/Error.py deleted file mode 100644 index 594dc20..0000000 --- a/src/Models/Language/Error.py +++ /dev/null @@ -1,5 +0,0 @@ -class Error: - - def __init__(self, code: float, msg: str): - self.code = code - self.msg = msg \ No newline at end of file