Added lexer

2021-08-14 16:41:25 +02:00
parent 4a3f9b4b9d
commit c909ce31d8
18 changed files with 438 additions and 210 deletions
--- a/src/cc_lang/model/datatypes.py
+++ b/src/cc_lang/model/datatypes.py
@@ -0,0 +1,12 @@
+from enum import Enum
+
+
+class Datatypes(Enum):
+
+    Empty = 'empty'
+    Any = 'any'
+    Number = 'number'
+    String = 'string'
+    Bool = 'bool'
+    List = 'list'
+    Dict = 'dict'
--- a/src/cc_lang/model/language_definition.py
+++ b/src/cc_lang/model/language_definition.py
@@ -0,0 +1,65 @@
+from cc_lang.model.datatypes import Datatypes
+from lexer.model.token_value_types import Keywords, FormatCharacters, ExpressionCharacters, Booleans
+
+
+class LanguageDefinition:
+    # interpreter
+    keywords = [
+        # define keywords
+        Keywords.Library.value,
+        Keywords.Class.value,
+        Keywords.Function.value,
+        Keywords.Variable.value,
+        Keywords.Use.value,
+        Keywords.From.value,
+        # builtin functions
+        Keywords.Output.value,
+        Keywords.Input.value,
+        Keywords.Length.value,
+        Keywords.Range.value,
+        Keywords.Exit.value,
+        # normal keywords
+        Keywords.If.value,
+        Keywords.ElseIf.value,
+        Keywords.Else.value,
+        Keywords.Continue.value,
+        Keywords.If.value,
+        Keywords.Return.value,
+        # loops
+        Keywords.While.value,
+        Keywords.For.value,
+        # access
+        Keywords.Public.value,
+        Keywords.This.value
+    ]
+    datatypes = [
+        Datatypes.Empty.value,
+        Datatypes.Any.value,
+        Datatypes.Number.value,
+        Datatypes.String.value,
+        Datatypes.Bool.value,
+        Datatypes.List.value,
+        Datatypes.Dict.value
+    ]
+    format_chars = [
+        FormatCharacters.Left_Brace.value,
+        FormatCharacters.Right_Brace.value,
+        FormatCharacters.Left_Parenthesis.value,
+        FormatCharacters.Right_Parenthesis.value,
+        FormatCharacters.Left_Bracket.value,
+        FormatCharacters.Right_Bracket.value,
+        FormatCharacters.Semicolon.value,
+        FormatCharacters.Colon.value,
+        FormatCharacters.Comma.value,
+        FormatCharacters.Point.value
+    ]
+    expr_chars = [
+        ExpressionCharacters.Plus.value,
+        ExpressionCharacters.Minus.value,
+        ExpressionCharacters.Asterisk.value,
+        ExpressionCharacters.Slash.value,
+        ExpressionCharacters.Equal.value,
+        ExpressionCharacters.Caret.value
+    ]
+    bool_expr_chars = ['<', '>', '!', '!=', '==', '>=', '<=', '&&', '||']
+    bool_values = [Booleans.Right.value, Booleans.Wrong.value]
--- a/src/cc_lang_interpreter/application.py
+++ b/src/cc_lang_interpreter/application.py
@@ -1,9 +1,12 @@
+import os
+
 from cpl.application import ApplicationABC
 from cpl.configuration import ConfigurationABC
 from cpl.console import Console
 from cpl.dependency_injection import ServiceProviderABC

 from lexer.abc.lexer_abc import LexerABC
+from runtime.abc.runtime_service_abc import RuntimeServiceABC


 class Application(ApplicationABC):
@@ -12,19 +15,57 @@ class Application(ApplicationABC):
        ApplicationABC.__init__(self, config, services)

        self._lexer: LexerABC = services.get_service(LexerABC)
+        self._runtime: RuntimeServiceABC = services.get_service(RuntimeServiceABC)
+
        self._path = config.get_configuration('p')

-    def _console(self): pass
+    def _interpret(self, line: str):
+        tokens = self._lexer.tokenize(line)

-    def _files(self): pass
+        line.replace("\n", "").replace("\t", "")
+        Console.write_line(f'\nLINE: {line}')
+        tokens.for_each(lambda t: Console.write_line(t.type, t.value))
+
+    def _console(self):
+        i = 0
+        while True:
+            self._runtime.line_count = i + 1
+            self._interpret(input('> '))
+            i += 1
+
+    def _files(self):
+        if not os.path.isdir(self._path):
+            raise FileNotFoundError(self._path)
+
+        # r=root, d=directories, f=files
+        for r, d, f in os.walk(self._path):
+            for file in f:
+                if file.endswith('.cc'):
+                    self._read_file(os.path.join(r, file))
+
+    def _read_file(self, file: str):
+        if not os.path.isfile(file):
+            raise FileNotFoundError
+            # self.__utils.runtime_error(Error(ErrorCodes.FileNotFound))
+
+        if not file.endswith('.cc'):
+            raise Exception('Wrong file type')
+            # self.__utils.runtime_error(Error(ErrorCodes.WrongFileType))
+
+        f = open(file, 'r', encoding='utf-8').readlines()
+        for i in range(0, len(f)):
+            self._runtime.line_count = i + 1
+            self._interpret(f[i])

    def configure(self):
        pass

    def main(self):
-        Console.write_line(self._configuration.additional_arguments, self._path)
        if self._path is None:
            self._console()
            return

-        self._files()
+        if os.path.isfile(self._path):
+            self._read_file(self._path)
+        else:
+            self._files()
--- a/src/cc_lang_interpreter/startup.py
+++ b/src/cc_lang_interpreter/startup.py
@@ -4,6 +4,8 @@ from cpl.dependency_injection import ServiceProviderABC, ServiceCollectionABC

 from lexer.abc.lexer_abc import LexerABC
 from lexer.service.lexer_service import LexerService
+from runtime.abc.runtime_service_abc import RuntimeServiceABC
+from runtime.service.runtime_service import RuntimeService


 class Startup(StartupABC):
@@ -16,12 +18,13 @@ class Startup(StartupABC):
        self._services = services

    def configure_configuration(self) -> ConfigurationABC:
-        self._configuration.add_console_argument(ConsoleArgument('-', 'p', [], ' '))
+        self._configuration.add_console_argument(ConsoleArgument('-', 'p', [], ' ', is_value_token_optional=True))
        self._configuration.add_console_arguments()

        return self._configuration

    def configure_services(self) -> ServiceProviderABC:
        self._services.add_singleton(LexerABC, LexerService)
+        self._services.add_singleton(RuntimeServiceABC, RuntimeService)

        return self._services.build_service_provider()
--- a/src/lexer/abc/lexer_abc.py
+++ b/src/lexer/abc/lexer_abc.py
@@ -1,7 +1,14 @@
 from abc import ABC, abstractmethod

+from cpl_query.extension.list import List
+
+from lexer.model.token import Token
+

 class LexerABC(ABC):

    @abstractmethod
    def __init__(self): pass
+
+    @abstractmethod
+    def tokenize(self, line: str) -> List[Token]: pass
--- a/src/lexer/model/init.py
+++ b/src/lexer/model/init.py
--- a/src/lexer/model/token.py
+++ b/src/lexer/model/token.py
@@ -0,0 +1,20 @@
+from lexer.model.token_types import TokenTypes
+
+
+class Token:
+
+    def __init__(self, token_type: TokenTypes, value: str) -> None:
+        self._type: TokenTypes = token_type
+        self._value: str = value
+
+    @property
+    def type(self) -> TokenTypes:
+        return self._type
+
+    @property
+    def value(self) -> str:
+        return self._value
+
+    @value.setter
+    def value(self, value: str):
+        self._value = value
--- a/src/lexer/model/token_types.py
+++ b/src/lexer/model/token_types.py
@@ -0,0 +1,24 @@
+from enum import Enum
+
+
+class TokenTypes(Enum):
+    Empty = 0
+    Keyword = 1
+    Type = 2
+    Name = 3
+    Bool = 4
+    String = 5
+    Number = 6
+    Expression_Character = 7
+    Bool_Expression_Character = 8
+    Format_Character = 9
+
+
+class UnresolvedTokenTypes(Enum):
+    Empty = 0
+    Word = 1
+    Number = 2
+    String = 3
+    Expression_Character = 4
+    Bool_Expression_Character = 5
+    Format_Character = 6
--- a/src/lexer/model/token_value_types.py
+++ b/src/lexer/model/token_value_types.py
@@ -0,0 +1,62 @@
+from enum import Enum
+
+
+class Keywords(Enum):
+    # define keywords
+    Library = 'lib'
+    Class = 'class'
+    Function = 'func'
+    Variable = 'var'
+    Use = 'use'
+    From = 'from'
+
+    # builtin functions
+    Output = 'output'
+    Input = 'input'
+    Length = 'length'
+    Range = 'range'
+    Exit = 'exit'
+
+    # normal keywords
+    If = 'if'
+    ElseIf = 'elseif'
+    Else = 'else'
+    Continue = 'continue'
+    In = 'in'
+    Return = 'return'
+
+    # loops
+    While = 'while'
+    For = 'for'
+
+    # access
+    Public = 'public'
+    This = 'this'
+
+
+class Booleans(Enum):
+    Right = 'true'
+    Wrong = 'false'
+
+
+class ExpressionCharacters(Enum):
+    Plus = '+'
+    Minus = '-'
+    Asterisk = '*'
+    Slash = '/'
+    Equal = '='
+    Caret = '^'
+
+
+class FormatCharacters(Enum):
+    Left_Brace = '{'
+    Right_Brace = '}'
+    Left_Parenthesis = '('
+    Right_Parenthesis = ')'
+    Left_Bracket = '['
+    Right_Bracket = ']'
+    Semicolon = ';'
+    Colon = ':'
+    Comma = ','
+    Point = '.'
+
--- a/src/lexer/service/lexer_service.py
+++ b/src/lexer/service/lexer_service.py
@@ -1,7 +1,166 @@
+from cpl_query.extension.list import List
+
+from cc_lang.model.language_definition import LanguageDefinition
 from lexer.abc.lexer_abc import LexerABC
+from lexer.model.token import Token
+from lexer.model.token_types import UnresolvedTokenTypes, TokenTypes
+from runtime.abc.runtime_service_abc import RuntimeServiceABC


 class LexerService(LexerABC):

-    def __init__(self):
-        pass
+    def __init__(self, runtime: RuntimeServiceABC):
+        self._runtime = runtime
+        
+        self._is_ml_comment = False
+
+    def _add_tok(self, tokens: List[Token], value: str, input_token_type: UnresolvedTokenTypes) -> None:
+        """
+        Creates token object
+        :param value:
+        :param input_token_type:
+        :return:
+        """
+        token_type: TokenTypes = TokenTypes.Empty
+
+        if value != '':
+            if input_token_type == UnresolvedTokenTypes.Word:
+                if value in LanguageDefinition.keywords:
+                    token_type = TokenTypes.Keyword
+
+                elif value in LanguageDefinition.datatypes:
+                    token_type = TokenTypes.Type
+
+                elif value in LanguageDefinition.bool_values:
+                    token_type = TokenTypes.Bool
+
+                elif value == UnresolvedTokenTypes.Empty:
+                    token_type = TokenTypes.Empty
+
+                else:
+                    token_type = TokenTypes.Name
+
+            elif input_token_type == UnresolvedTokenTypes.Number:
+                token_type = TokenTypes.Number
+
+            elif input_token_type == UnresolvedTokenTypes.String:
+                token_type = TokenTypes.String
+
+            elif input_token_type == UnresolvedTokenTypes.Expression_Character:
+                token_type = TokenTypes.Expression_Character
+
+            elif input_token_type == UnresolvedTokenTypes.Bool_Expression_Character:
+                token_type = TokenTypes.Bool_Expression_Character
+
+            elif input_token_type == UnresolvedTokenTypes.Format_Character:
+                token_type = TokenTypes.Format_Character
+
+            tokens.append(Token(token_type, value))
+
+    def tokenize(self, line: str) -> List[Token]:
+        tokens: List[Token] = List(Token)
+        word = ''
+        ol_comment = False
+        is_string1 = False  # 'hello'
+        is_string2 = False  # "hello"
+        is_number = False
+        is_expr_char = False
+
+        for i in range(0, len(line)):
+            c = line[i]
+            # ignore comments and spaces
+            if not ol_comment and not self._is_ml_comment:
+                # comment filtering
+                if c == '#' and not is_string1 and not is_string2:
+                    ol_comment = True
+
+                elif line[i - 1] == '/' and c == '/':
+                    ol_comment = True
+
+                elif line[i - 1] == '/' and c == '*':
+                    self._is_ml_comment = True
+                    i += 2
+
+                # end of number
+                elif not c.isdigit() and c != '.' and is_number:
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.Number)
+                    local_tokens = self.tokenize(c)
+                    for local_token in local_tokens:
+                        tokens.append(local_token)
+
+                    word = ''
+                    is_number = False
+
+                # end of expression char
+                elif c not in LanguageDefinition.expr_chars and is_expr_char:
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.Expression_Character)
+                    word = ''
+                    is_expr_char = False
+
+                # begin of is_string1
+                elif c == '\'' and not is_string1:
+                    is_string1 = True
+                    word = ''
+
+                # end of is_string1
+                elif c == '\'' and is_string1:
+                    is_string1 = False
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.String)
+                    word = ''
+
+                # begin of is_string2
+                elif c == '\"' and not is_string2:
+                    is_string2 = True
+                    word = ''
+
+                # end of is_string2
+                elif c == '\"' and is_string2:
+                    is_string2 = False
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.String)
+                    word = ''
+
+                # format char
+                elif c in LanguageDefinition.format_chars:
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
+                    self._add_tok(tokens, c, UnresolvedTokenTypes.Format_Character)
+                    word = ''
+
+                # begin of number
+                elif c.isdigit() and not is_number and word == '':
+                    word += c
+                    is_number = True
+
+                # continue number
+                elif (c.isdigit() or c == '.') and is_number:
+                    word += c
+
+                # begin expression char
+                elif c in LanguageDefinition.expr_chars and not is_expr_char:
+                    word += c
+                    is_expr_char = True
+
+                # continue expression char
+                elif c in LanguageDefinition.expr_chars and is_expr_char:
+                    word += c
+
+                # bool expression char
+                elif c in LanguageDefinition.bool_expr_chars:
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
+                    self._add_tok(tokens, c, UnresolvedTokenTypes.Bool_Expression_Character)
+                    word = ''
+
+                # end of word
+                elif c == ' ' and not is_string1 and not is_string2 or c == '\n':
+                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
+                    word = ''
+
+                else:
+                    word += c
+
+            if c == '\n' and ol_comment:
+                ol_comment = False
+
+            if line[i - 1] == '*' and c == '/':
+                self._is_ml_comment = False
+
+        return tokens
--- a/src/runtime/abc/runtime_service_abc.py
+++ b/src/runtime/abc/runtime_service_abc.py
@@ -0,0 +1,15 @@
+from abc import ABC, abstractmethod
+
+
+class RuntimeServiceABC(ABC):
+
+    @abstractmethod
+    def __init__(self): pass
+
+    @property
+    @abstractmethod
+    def line_count(self) -> int: pass
+
+    @line_count.setter
+    @abstractmethod
+    def line_count(self, line_count: int): pass
--- a/src/runtime/service/init.py
+++ b/src/runtime/service/init.py
@@ -0,0 +1 @@
+# imports
--- a/src/runtime/service/runtime_service.py
+++ b/src/runtime/service/runtime_service.py
@@ -0,0 +1,15 @@
+from runtime.abc.runtime_service_abc import RuntimeServiceABC
+
+
+class RuntimeService(RuntimeServiceABC):
+
+    def __init__(self):
+        self._line_count = 0
+
+    @property
+    def line_count(self) -> int:
+        return self._line_count
+
+    @line_count.setter
+    def line_count(self, line_count: int):
+        self._line_count = line_count