Added lexer

2021-08-14 16:41:25 +02:00
parent 4a3f9b4b9d
commit c909ce31d8
18 changed files with 438 additions and 210 deletions
--- a/cc_code_preview/Program/main.cc
+++ b/cc_code_preview/Program/main.cc
@@ -15,5 +15,12 @@ public lib Main {
            output(isTrue(test));
            output(this.test.getName());
        }
        private func testForEach(): void {
            var list = [];
            list.forEach(e => {
                output(e);
            });
        }
    }
 }
--- a/old/src/CCLang_sly/Interpreter.py
+++ b/old/src/CCLang_sly/Interpreter.py
@@ -1,31 +0,0 @@
 from typing import Optional
 from CCLang_sly.Parser import Parser
 from Interpreter.Validator import Validator
 from CCLang_sly.Lexer import Lexer
 from Interpreter.Repo import Repo
 from Interpreter.Utils import Utils
 from Models.AbstractSyntaxTree.AbstractSyntaxTree import AbstractSyntaxTree
 class Interpreter:
    def __init__(self, repo: Repo, utils: Utils) -> None:
        self.__repo = repo
        self.__utils = utils
        # self.__lexer = Lexer(repo, utils)
        # self.__parser = Parser(repo, utils)
        # self.__validator = Validator(repo, utils)
    def interpret(self, line_str: str) -> None:
        """
        Interprets code line
        :param line_str:
        :return:
        """
        lexer = Lexer()
        parser = Parser()
        env = {}
        ast = parser.parse(lexer.tokenize(line_str))
        if ast is not None:
            print(ast)
--- a/old/src/CCLang_sly/Lexer.py
+++ b/old/src/CCLang_sly/Lexer.py
@@ -1,108 +0,0 @@
 from sly import Lexer as SlyLexer
 from Models.CCLang.TokenDefinition import TokenDefinition
 class Lexer(SlyLexer):
    # Ignored pattern
    ignore = '\t '
    # ignore_comment = r'(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)|([#].*)'
    ignore_comment = r'([#].*|(//.*))'
    ignore_newline = r'\n+'
    tokens = {
        LIBRARY,
        CLASS,
        FUNCTION,
        VARIABLE,
        USE,
        FROM,
        OUTPUT,
        INPUT,
        LENGTH,
        RANGE,
        EXIT,
        IF,
        ELSEIF,
        ELSE,
        CONTINUE,
        IN,
        RETURN,
        WHILE,
        FOR,
        PUBLIC,
        THIS,
        LBRACE,
        RBRACE,
        LPARAN,
        RPARAN,
        LBRACKET,
        RBRACKET,
        SEMICOLON,
        COLON,
        COMMA,
        POINT,
        PLUS,
        MINUS,
        ASTERIK,
        SLASH,
        EQUAL,
        CARET,
        TRUE,
        FALSE,
        STRING,
        NUMBER,
        EMPTY,
        NAME
    }
    # token definition
    LIBRARY = TokenDefinition.Library.value
    CLASS = TokenDefinition.Class.value
    FUNCTION = TokenDefinition.Function.value
    VARIABLE = TokenDefinition.Variable.value
    USE = TokenDefinition.Use.value
    FROM = TokenDefinition.From.value
    OUTPUT = TokenDefinition.Output.value
    INPUT = TokenDefinition.Input.value
    LENGTH = TokenDefinition.Length.value
    RANGE = TokenDefinition.Range.value
    EXIT = TokenDefinition.Exit.value
    IF = TokenDefinition.If.value
    ELSEIF = TokenDefinition.ElseIf.value
    ELSE = TokenDefinition.Else.value
    CONTINUE = TokenDefinition.Continue.value
    IN = TokenDefinition.In.value
    RETURN = TokenDefinition.Return.value
    WHILE = TokenDefinition.While.value
    FOR = TokenDefinition.For.value
    PUBLIC = TokenDefinition.Public.value
    THIS = TokenDefinition.This.value
    LBRACE = TokenDefinition.LeftBrace.value
    RBRACE = TokenDefinition.RightBrace.value
    LPARAN = TokenDefinition.LeftParenthesis.value
    RPARAN = TokenDefinition.RightParenthesis.value
    LBRACKET = TokenDefinition.LeftBracket.value
    RBRACKET = TokenDefinition.RightBracket.value
    SEMICOLON = TokenDefinition.Semicolon.value
    COLON = TokenDefinition.Colon.value
    COMMA = TokenDefinition.Comma.value
    POINT = TokenDefinition.Point.value
    PLUS = TokenDefinition.Plus.value
    MINUS = TokenDefinition.Minus.value
    ASTERIK = TokenDefinition.Asterisk.value
    SLASH = TokenDefinition.Slash.value
    EQUAL = TokenDefinition.Equal.value
    CARET = TokenDefinition.Caret.value
    TRUE = TokenDefinition.BoolTrue.value
    FALSE = TokenDefinition.BoolFalse.value
    STRING = TokenDefinition.String.value
    NUMBER = TokenDefinition.Number.value
    EMPTY = TokenDefinition.Empty.value
    NAME = TokenDefinition.Name.value
    def error(self, t):
        print("Illegal character '%s'" % t.value[0])
        # self.index += 1
--- a/old/src/CCLang_sly/Parser.py
+++ b/old/src/CCLang_sly/Parser.py
@@ -1,61 +0,0 @@
 from sly import Parser as SlyParser
 from CCLang_sly.Lexer import Lexer
 class Parser(SlyParser):
    tokens = Lexer.tokens
    #precedence = (
    #    ('left', '+', '-'),
    #    ('left', '*', '/'),
    #    ('right', 'UMINUS'),
    #)
    def __init__(self):
        self.env = {}
    @_('')
    def statement(self, p):
        pass
    def error(self, p):
        print(f'ERROR: {p}')
        pass
    # lib definition
    @_('PUBLIC LIBRARY NAME LBRACE')
    def statement(self, p):
        return ('lib_def', p.NAME, True)
    @_('LIBRARY NAME LBRACE')
    def statement(self, p):
        return ('lib_def', p.NAME, False)
    # class definition
    @_('PUBLIC CLASS NAME LBRACE')
    def statement(self, p):
        return ('class_def', p.NAME, True)
    @_('CLASS NAME LBRACE')
    def statement(self, p):
        return ('class_def', p.NAME, False)
    # func definition
    @_('PUBLIC FUNCTION NAME LPARAN statement RPARAN COLON type LBRACE')
    def statement(self, p):
        return ('func_def', p.NAME, True)
    @_('FUNCTION NAME LPARAN RPARAN COLON type LBRACE')
    def statement(self, p):
        return ('func_def', p.NAME, False)
    # types
    @_('EMPTY')
    def type(self, p):
        return ('type', p.EMPTY)
    # right brace
    @_('RBRACE')
    def statement(self, p):
        return ('end', p.RBRACE)
--- a/old/src/cclang.py
+++ b/old/src/cclang.py
@@ -12,7 +12,6 @@ class Main:
        self.__utils = self.__services.utils
        self.__repo = self.__services.repo
        self.__interpreter = self.__services.interpreter
        # self.__sly_cclang_interpreter = self.__services.sly_cclang_interpreter
    def console(self) -> None:
        """
@@ -23,7 +22,6 @@ class Main:
        while self.__repo.error is None:
            self.__repo.line_number = i + 1
            self.__interpreter.interpret(input('> '))
            # self.__sly_cclang_interpreter.interpret(input('> '))
            i += 1
    def files(self, file: str) -> None:
@@ -44,7 +42,6 @@ class Main:
        for i in range(0, len(f)):
            self.__repo.line_number = i + 1
            self.__interpreter.interpret(f[i])
            # self.__sly_cclang_interpreter.interpret(f[i])
 if __name__ == '__main__':
--- a/src/cc_lang/model/datatypes.py
+++ b/src/cc_lang/model/datatypes.py
@@ -0,0 +1,12 @@
 from enum import Enum
 class Datatypes(Enum):
    Empty = 'empty'
    Any = 'any'
    Number = 'number'
    String = 'string'
    Bool = 'bool'
    List = 'list'
    Dict = 'dict'
--- a/src/cc_lang/model/language_definition.py
+++ b/src/cc_lang/model/language_definition.py
@@ -0,0 +1,65 @@
 from cc_lang.model.datatypes import Datatypes
 from lexer.model.token_value_types import Keywords, FormatCharacters, ExpressionCharacters, Booleans
 class LanguageDefinition:
    # interpreter
    keywords = [
        # define keywords
        Keywords.Library.value,
        Keywords.Class.value,
        Keywords.Function.value,
        Keywords.Variable.value,
        Keywords.Use.value,
        Keywords.From.value,
        # builtin functions
        Keywords.Output.value,
        Keywords.Input.value,
        Keywords.Length.value,
        Keywords.Range.value,
        Keywords.Exit.value,
        # normal keywords
        Keywords.If.value,
        Keywords.ElseIf.value,
        Keywords.Else.value,
        Keywords.Continue.value,
        Keywords.If.value,
        Keywords.Return.value,
        # loops
        Keywords.While.value,
        Keywords.For.value,
        # access
        Keywords.Public.value,
        Keywords.This.value
    ]
    datatypes = [
        Datatypes.Empty.value,
        Datatypes.Any.value,
        Datatypes.Number.value,
        Datatypes.String.value,
        Datatypes.Bool.value,
        Datatypes.List.value,
        Datatypes.Dict.value
    ]
    format_chars = [
        FormatCharacters.Left_Brace.value,
        FormatCharacters.Right_Brace.value,
        FormatCharacters.Left_Parenthesis.value,
        FormatCharacters.Right_Parenthesis.value,
        FormatCharacters.Left_Bracket.value,
        FormatCharacters.Right_Bracket.value,
        FormatCharacters.Semicolon.value,
        FormatCharacters.Colon.value,
        FormatCharacters.Comma.value,
        FormatCharacters.Point.value
    ]
    expr_chars = [
        ExpressionCharacters.Plus.value,
        ExpressionCharacters.Minus.value,
        ExpressionCharacters.Asterisk.value,
        ExpressionCharacters.Slash.value,
        ExpressionCharacters.Equal.value,
        ExpressionCharacters.Caret.value
    ]
    bool_expr_chars = ['<', '>', '!', '!=', '==', '>=', '<=', '&&', '||']
    bool_values = [Booleans.Right.value, Booleans.Wrong.value]
--- a/src/cc_lang_interpreter/application.py
+++ b/src/cc_lang_interpreter/application.py
@@ -1,9 +1,12 @@
 import os
 from cpl.application import ApplicationABC
 from cpl.configuration import ConfigurationABC
 from cpl.console import Console
 from cpl.dependency_injection import ServiceProviderABC
 from lexer.abc.lexer_abc import LexerABC
 from runtime.abc.runtime_service_abc import RuntimeServiceABC
 class Application(ApplicationABC):
@@ -12,19 +15,57 @@ class Application(ApplicationABC):
        ApplicationABC.__init__(self, config, services)
        self._lexer: LexerABC = services.get_service(LexerABC)
        self._runtime: RuntimeServiceABC = services.get_service(RuntimeServiceABC)
        self._path = config.get_configuration('p')
-    def _console(self): pass
+    def _interpret(self, line: str):
        tokens = self._lexer.tokenize(line)
-    def _files(self): pass
+        line.replace("\n", "").replace("\t", "")
        Console.write_line(f'\nLINE: {line}')
        tokens.for_each(lambda t: Console.write_line(t.type, t.value))
    def _console(self):
        i = 0
        while True:
            self._runtime.line_count = i + 1
            self._interpret(input('> '))
            i += 1
    def _files(self):
        if not os.path.isdir(self._path):
            raise FileNotFoundError(self._path)
        # r=root, d=directories, f=files
        for r, d, f in os.walk(self._path):
            for file in f:
                if file.endswith('.cc'):
                    self._read_file(os.path.join(r, file))
    def _read_file(self, file: str):
        if not os.path.isfile(file):
            raise FileNotFoundError
            # self.__utils.runtime_error(Error(ErrorCodes.FileNotFound))
        if not file.endswith('.cc'):
            raise Exception('Wrong file type')
            # self.__utils.runtime_error(Error(ErrorCodes.WrongFileType))
        f = open(file, 'r', encoding='utf-8').readlines()
        for i in range(0, len(f)):
            self._runtime.line_count = i + 1
            self._interpret(f[i])
    def configure(self):
        pass
    def main(self):
        Console.write_line(self._configuration.additional_arguments, self._path)
        if self._path is None:
            self._console()
            return
        if os.path.isfile(self._path):
            self._read_file(self._path)
        else:
            self._files()
--- a/src/cc_lang_interpreter/startup.py
+++ b/src/cc_lang_interpreter/startup.py
@@ -4,6 +4,8 @@ from cpl.dependency_injection import ServiceProviderABC, ServiceCollectionABC
 from lexer.abc.lexer_abc import LexerABC
 from lexer.service.lexer_service import LexerService
 from runtime.abc.runtime_service_abc import RuntimeServiceABC
 from runtime.service.runtime_service import RuntimeService
 class Startup(StartupABC):
@@ -16,12 +18,13 @@ class Startup(StartupABC):
        self._services = services
    def configure_configuration(self) -> ConfigurationABC:
-        self._configuration.add_console_argument(ConsoleArgument('-', 'p', [], ' '))
+        self._configuration.add_console_argument(ConsoleArgument('-', 'p', [], ' ', is_value_token_optional=True))
        self._configuration.add_console_arguments()
        return self._configuration
    def configure_services(self) -> ServiceProviderABC:
        self._services.add_singleton(LexerABC, LexerService)
        self._services.add_singleton(RuntimeServiceABC, RuntimeService)
        return self._services.build_service_provider()
--- a/src/lexer/abc/lexer_abc.py
+++ b/src/lexer/abc/lexer_abc.py
@@ -1,7 +1,14 @@
 from abc import ABC, abstractmethod
 from cpl_query.extension.list import List
 from lexer.model.token import Token
 class LexerABC(ABC):
    @abstractmethod
    def __init__(self): pass
    @abstractmethod
    def tokenize(self, line: str) -> List[Token]: pass
--- a/old/src/CCLang_sly/init.py
+++ b/old/src/CCLang_sly/init.py
--- a/src/lexer/model/token.py
+++ b/src/lexer/model/token.py
@@ -0,0 +1,20 @@
 from lexer.model.token_types import TokenTypes
 class Token:
    def __init__(self, token_type: TokenTypes, value: str) -> None:
        self._type: TokenTypes = token_type
        self._value: str = value
    @property
    def type(self) -> TokenTypes:
        return self._type
    @property
    def value(self) -> str:
        return self._value
    @value.setter
    def value(self, value: str):
        self._value = value
--- a/src/lexer/model/token_types.py
+++ b/src/lexer/model/token_types.py
@@ -0,0 +1,24 @@
 from enum import Enum
 class TokenTypes(Enum):
    Empty = 0
    Keyword = 1
    Type = 2
    Name = 3
    Bool = 4
    String = 5
    Number = 6
    Expression_Character = 7
    Bool_Expression_Character = 8
    Format_Character = 9
 class UnresolvedTokenTypes(Enum):
    Empty = 0
    Word = 1
    Number = 2
    String = 3
    Expression_Character = 4
    Bool_Expression_Character = 5
    Format_Character = 6
--- a/src/lexer/model/token_value_types.py
+++ b/src/lexer/model/token_value_types.py
@@ -0,0 +1,62 @@
 from enum import Enum
 class Keywords(Enum):
    # define keywords
    Library = 'lib'
    Class = 'class'
    Function = 'func'
    Variable = 'var'
    Use = 'use'
    From = 'from'
    # builtin functions
    Output = 'output'
    Input = 'input'
    Length = 'length'
    Range = 'range'
    Exit = 'exit'
    # normal keywords
    If = 'if'
    ElseIf = 'elseif'
    Else = 'else'
    Continue = 'continue'
    In = 'in'
    Return = 'return'
    # loops
    While = 'while'
    For = 'for'
    # access
    Public = 'public'
    This = 'this'
 class Booleans(Enum):
    Right = 'true'
    Wrong = 'false'
 class ExpressionCharacters(Enum):
    Plus = '+'
    Minus = '-'
    Asterisk = '*'
    Slash = '/'
    Equal = '='
    Caret = '^'
 class FormatCharacters(Enum):
    Left_Brace = '{'
    Right_Brace = '}'
    Left_Parenthesis = '('
    Right_Parenthesis = ')'
    Left_Bracket = '['
    Right_Bracket = ']'
    Semicolon = ';'
    Colon = ':'
    Comma = ','
    Point = '.'
--- a/src/lexer/service/lexer_service.py
+++ b/src/lexer/service/lexer_service.py
@@ -1,7 +1,166 @@
 from cpl_query.extension.list import List
 from cc_lang.model.language_definition import LanguageDefinition
 from lexer.abc.lexer_abc import LexerABC
 from lexer.model.token import Token
 from lexer.model.token_types import UnresolvedTokenTypes, TokenTypes
 from runtime.abc.runtime_service_abc import RuntimeServiceABC
 class LexerService(LexerABC):
-    def __init__(self):
+    def __init__(self, runtime: RuntimeServiceABC):
-        pass
+        self._runtime = runtime
        self._is_ml_comment = False
    def _add_tok(self, tokens: List[Token], value: str, input_token_type: UnresolvedTokenTypes) -> None:
        """
        Creates token object
        :param value:
        :param input_token_type:
        :return:
        """
        token_type: TokenTypes = TokenTypes.Empty
        if value != '':
            if input_token_type == UnresolvedTokenTypes.Word:
                if value in LanguageDefinition.keywords:
                    token_type = TokenTypes.Keyword
                elif value in LanguageDefinition.datatypes:
                    token_type = TokenTypes.Type
                elif value in LanguageDefinition.bool_values:
                    token_type = TokenTypes.Bool
                elif value == UnresolvedTokenTypes.Empty:
                    token_type = TokenTypes.Empty
                else:
                    token_type = TokenTypes.Name
            elif input_token_type == UnresolvedTokenTypes.Number:
                token_type = TokenTypes.Number
            elif input_token_type == UnresolvedTokenTypes.String:
                token_type = TokenTypes.String
            elif input_token_type == UnresolvedTokenTypes.Expression_Character:
                token_type = TokenTypes.Expression_Character
            elif input_token_type == UnresolvedTokenTypes.Bool_Expression_Character:
                token_type = TokenTypes.Bool_Expression_Character
            elif input_token_type == UnresolvedTokenTypes.Format_Character:
                token_type = TokenTypes.Format_Character
            tokens.append(Token(token_type, value))
    def tokenize(self, line: str) -> List[Token]:
        tokens: List[Token] = List(Token)
        word = ''
        ol_comment = False
        is_string1 = False  # 'hello'
        is_string2 = False  # "hello"
        is_number = False
        is_expr_char = False
        for i in range(0, len(line)):
            c = line[i]
            # ignore comments and spaces
            if not ol_comment and not self._is_ml_comment:
                # comment filtering
                if c == '#' and not is_string1 and not is_string2:
                    ol_comment = True
                elif line[i - 1] == '/' and c == '/':
                    ol_comment = True
                elif line[i - 1] == '/' and c == '*':
                    self._is_ml_comment = True
                    i += 2
                # end of number
                elif not c.isdigit() and c != '.' and is_number:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Number)
                    local_tokens = self.tokenize(c)
                    for local_token in local_tokens:
                        tokens.append(local_token)
                    word = ''
                    is_number = False
                # end of expression char
                elif c not in LanguageDefinition.expr_chars and is_expr_char:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Expression_Character)
                    word = ''
                    is_expr_char = False
                # begin of is_string1
                elif c == '\'' and not is_string1:
                    is_string1 = True
                    word = ''
                # end of is_string1
                elif c == '\'' and is_string1:
                    is_string1 = False
                    self._add_tok(tokens, word, UnresolvedTokenTypes.String)
                    word = ''
                # begin of is_string2
                elif c == '\"' and not is_string2:
                    is_string2 = True
                    word = ''
                # end of is_string2
                elif c == '\"' and is_string2:
                    is_string2 = False
                    self._add_tok(tokens, word, UnresolvedTokenTypes.String)
                    word = ''
                # format char
                elif c in LanguageDefinition.format_chars:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
                    self._add_tok(tokens, c, UnresolvedTokenTypes.Format_Character)
                    word = ''
                # begin of number
                elif c.isdigit() and not is_number and word == '':
                    word += c
                    is_number = True
                # continue number
                elif (c.isdigit() or c == '.') and is_number:
                    word += c
                # begin expression char
                elif c in LanguageDefinition.expr_chars and not is_expr_char:
                    word += c
                    is_expr_char = True
                # continue expression char
                elif c in LanguageDefinition.expr_chars and is_expr_char:
                    word += c
                # bool expression char
                elif c in LanguageDefinition.bool_expr_chars:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
                    self._add_tok(tokens, c, UnresolvedTokenTypes.Bool_Expression_Character)
                    word = ''
                # end of word
                elif c == ' ' and not is_string1 and not is_string2 or c == '\n':
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
                    word = ''
                else:
                    word += c
            if c == '\n' and ol_comment:
                ol_comment = False
            if line[i - 1] == '*' and c == '/':
                self._is_ml_comment = False
        return tokens
--- a/src/runtime/abc/runtime_service_abc.py
+++ b/src/runtime/abc/runtime_service_abc.py
@@ -0,0 +1,15 @@
 from abc import ABC, abstractmethod
 class RuntimeServiceABC(ABC):
    @abstractmethod
    def __init__(self): pass
    @property
    @abstractmethod
    def line_count(self) -> int: pass
    @line_count.setter
    @abstractmethod
    def line_count(self, line_count: int): pass
--- a/src/runtime/service/init.py
+++ b/src/runtime/service/init.py
@@ -0,0 +1 @@
 # imports
--- a/src/runtime/service/runtime_service.py
+++ b/src/runtime/service/runtime_service.py
@@ -0,0 +1,15 @@
 from runtime.abc.runtime_service_abc import RuntimeServiceABC
 class RuntimeService(RuntimeServiceABC):
    def __init__(self):
        self._line_count = 0
    @property
    def line_count(self) -> int:
        return self._line_count
    @line_count.setter
    def line_count(self, line_count: int):
        self._line_count = line_count