Added lexer

This commit is contained in:
Sven Heidemann 2021-08-14 16:41:25 +02:00
parent 4a3f9b4b9d
commit c909ce31d8
18 changed files with 438 additions and 210 deletions

View File

@ -15,5 +15,12 @@ public lib Main {
output(isTrue(test)); output(isTrue(test));
output(this.test.getName()); output(this.test.getName());
} }
private func testForEach(): void {
var list = [];
list.forEach(e => {
output(e);
});
}
} }
} }

View File

@ -1,31 +0,0 @@
from typing import Optional
from CCLang_sly.Parser import Parser
from Interpreter.Validator import Validator
from CCLang_sly.Lexer import Lexer
from Interpreter.Repo import Repo
from Interpreter.Utils import Utils
from Models.AbstractSyntaxTree.AbstractSyntaxTree import AbstractSyntaxTree
class Interpreter:
def __init__(self, repo: Repo, utils: Utils) -> None:
self.__repo = repo
self.__utils = utils
# self.__lexer = Lexer(repo, utils)
# self.__parser = Parser(repo, utils)
# self.__validator = Validator(repo, utils)
def interpret(self, line_str: str) -> None:
"""
Interprets code line
:param line_str:
:return:
"""
lexer = Lexer()
parser = Parser()
env = {}
ast = parser.parse(lexer.tokenize(line_str))
if ast is not None:
print(ast)

View File

@ -1,108 +0,0 @@
from sly import Lexer as SlyLexer
from Models.CCLang.TokenDefinition import TokenDefinition
class Lexer(SlyLexer):
# Ignored pattern
ignore = '\t '
# ignore_comment = r'(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)|([#].*)'
ignore_comment = r'([#].*|(//.*))'
ignore_newline = r'\n+'
tokens = {
LIBRARY,
CLASS,
FUNCTION,
VARIABLE,
USE,
FROM,
OUTPUT,
INPUT,
LENGTH,
RANGE,
EXIT,
IF,
ELSEIF,
ELSE,
CONTINUE,
IN,
RETURN,
WHILE,
FOR,
PUBLIC,
THIS,
LBRACE,
RBRACE,
LPARAN,
RPARAN,
LBRACKET,
RBRACKET,
SEMICOLON,
COLON,
COMMA,
POINT,
PLUS,
MINUS,
ASTERIK,
SLASH,
EQUAL,
CARET,
TRUE,
FALSE,
STRING,
NUMBER,
EMPTY,
NAME
}
# token definition
LIBRARY = TokenDefinition.Library.value
CLASS = TokenDefinition.Class.value
FUNCTION = TokenDefinition.Function.value
VARIABLE = TokenDefinition.Variable.value
USE = TokenDefinition.Use.value
FROM = TokenDefinition.From.value
OUTPUT = TokenDefinition.Output.value
INPUT = TokenDefinition.Input.value
LENGTH = TokenDefinition.Length.value
RANGE = TokenDefinition.Range.value
EXIT = TokenDefinition.Exit.value
IF = TokenDefinition.If.value
ELSEIF = TokenDefinition.ElseIf.value
ELSE = TokenDefinition.Else.value
CONTINUE = TokenDefinition.Continue.value
IN = TokenDefinition.In.value
RETURN = TokenDefinition.Return.value
WHILE = TokenDefinition.While.value
FOR = TokenDefinition.For.value
PUBLIC = TokenDefinition.Public.value
THIS = TokenDefinition.This.value
LBRACE = TokenDefinition.LeftBrace.value
RBRACE = TokenDefinition.RightBrace.value
LPARAN = TokenDefinition.LeftParenthesis.value
RPARAN = TokenDefinition.RightParenthesis.value
LBRACKET = TokenDefinition.LeftBracket.value
RBRACKET = TokenDefinition.RightBracket.value
SEMICOLON = TokenDefinition.Semicolon.value
COLON = TokenDefinition.Colon.value
COMMA = TokenDefinition.Comma.value
POINT = TokenDefinition.Point.value
PLUS = TokenDefinition.Plus.value
MINUS = TokenDefinition.Minus.value
ASTERIK = TokenDefinition.Asterisk.value
SLASH = TokenDefinition.Slash.value
EQUAL = TokenDefinition.Equal.value
CARET = TokenDefinition.Caret.value
TRUE = TokenDefinition.BoolTrue.value
FALSE = TokenDefinition.BoolFalse.value
STRING = TokenDefinition.String.value
NUMBER = TokenDefinition.Number.value
EMPTY = TokenDefinition.Empty.value
NAME = TokenDefinition.Name.value
def error(self, t):
print("Illegal character '%s'" % t.value[0])
# self.index += 1

View File

@ -1,61 +0,0 @@
from sly import Parser as SlyParser
from CCLang_sly.Lexer import Lexer
class Parser(SlyParser):
tokens = Lexer.tokens
#precedence = (
# ('left', '+', '-'),
# ('left', '*', '/'),
# ('right', 'UMINUS'),
#)
def __init__(self):
self.env = {}
@_('')
def statement(self, p):
pass
def error(self, p):
print(f'ERROR: {p}')
pass
# lib definition
@_('PUBLIC LIBRARY NAME LBRACE')
def statement(self, p):
return ('lib_def', p.NAME, True)
@_('LIBRARY NAME LBRACE')
def statement(self, p):
return ('lib_def', p.NAME, False)
# class definition
@_('PUBLIC CLASS NAME LBRACE')
def statement(self, p):
return ('class_def', p.NAME, True)
@_('CLASS NAME LBRACE')
def statement(self, p):
return ('class_def', p.NAME, False)
# func definition
@_('PUBLIC FUNCTION NAME LPARAN statement RPARAN COLON type LBRACE')
def statement(self, p):
return ('func_def', p.NAME, True)
@_('FUNCTION NAME LPARAN RPARAN COLON type LBRACE')
def statement(self, p):
return ('func_def', p.NAME, False)
# types
@_('EMPTY')
def type(self, p):
return ('type', p.EMPTY)
# right brace
@_('RBRACE')
def statement(self, p):
return ('end', p.RBRACE)

View File

@ -12,7 +12,6 @@ class Main:
self.__utils = self.__services.utils self.__utils = self.__services.utils
self.__repo = self.__services.repo self.__repo = self.__services.repo
self.__interpreter = self.__services.interpreter self.__interpreter = self.__services.interpreter
# self.__sly_cclang_interpreter = self.__services.sly_cclang_interpreter
def console(self) -> None: def console(self) -> None:
""" """
@ -23,7 +22,6 @@ class Main:
while self.__repo.error is None: while self.__repo.error is None:
self.__repo.line_number = i + 1 self.__repo.line_number = i + 1
self.__interpreter.interpret(input('> ')) self.__interpreter.interpret(input('> '))
# self.__sly_cclang_interpreter.interpret(input('> '))
i += 1 i += 1
def files(self, file: str) -> None: def files(self, file: str) -> None:
@ -44,7 +42,6 @@ class Main:
for i in range(0, len(f)): for i in range(0, len(f)):
self.__repo.line_number = i + 1 self.__repo.line_number = i + 1
self.__interpreter.interpret(f[i]) self.__interpreter.interpret(f[i])
# self.__sly_cclang_interpreter.interpret(f[i])
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -0,0 +1,12 @@
from enum import Enum
class Datatypes(Enum):
Empty = 'empty'
Any = 'any'
Number = 'number'
String = 'string'
Bool = 'bool'
List = 'list'
Dict = 'dict'

View File

@ -0,0 +1,65 @@
from cc_lang.model.datatypes import Datatypes
from lexer.model.token_value_types import Keywords, FormatCharacters, ExpressionCharacters, Booleans
class LanguageDefinition:
# interpreter
keywords = [
# define keywords
Keywords.Library.value,
Keywords.Class.value,
Keywords.Function.value,
Keywords.Variable.value,
Keywords.Use.value,
Keywords.From.value,
# builtin functions
Keywords.Output.value,
Keywords.Input.value,
Keywords.Length.value,
Keywords.Range.value,
Keywords.Exit.value,
# normal keywords
Keywords.If.value,
Keywords.ElseIf.value,
Keywords.Else.value,
Keywords.Continue.value,
Keywords.If.value,
Keywords.Return.value,
# loops
Keywords.While.value,
Keywords.For.value,
# access
Keywords.Public.value,
Keywords.This.value
]
datatypes = [
Datatypes.Empty.value,
Datatypes.Any.value,
Datatypes.Number.value,
Datatypes.String.value,
Datatypes.Bool.value,
Datatypes.List.value,
Datatypes.Dict.value
]
format_chars = [
FormatCharacters.Left_Brace.value,
FormatCharacters.Right_Brace.value,
FormatCharacters.Left_Parenthesis.value,
FormatCharacters.Right_Parenthesis.value,
FormatCharacters.Left_Bracket.value,
FormatCharacters.Right_Bracket.value,
FormatCharacters.Semicolon.value,
FormatCharacters.Colon.value,
FormatCharacters.Comma.value,
FormatCharacters.Point.value
]
expr_chars = [
ExpressionCharacters.Plus.value,
ExpressionCharacters.Minus.value,
ExpressionCharacters.Asterisk.value,
ExpressionCharacters.Slash.value,
ExpressionCharacters.Equal.value,
ExpressionCharacters.Caret.value
]
bool_expr_chars = ['<', '>', '!', '!=', '==', '>=', '<=', '&&', '||']
bool_values = [Booleans.Right.value, Booleans.Wrong.value]

View File

@ -1,9 +1,12 @@
import os
from cpl.application import ApplicationABC from cpl.application import ApplicationABC
from cpl.configuration import ConfigurationABC from cpl.configuration import ConfigurationABC
from cpl.console import Console from cpl.console import Console
from cpl.dependency_injection import ServiceProviderABC from cpl.dependency_injection import ServiceProviderABC
from lexer.abc.lexer_abc import LexerABC from lexer.abc.lexer_abc import LexerABC
from runtime.abc.runtime_service_abc import RuntimeServiceABC
class Application(ApplicationABC): class Application(ApplicationABC):
@ -12,19 +15,57 @@ class Application(ApplicationABC):
ApplicationABC.__init__(self, config, services) ApplicationABC.__init__(self, config, services)
self._lexer: LexerABC = services.get_service(LexerABC) self._lexer: LexerABC = services.get_service(LexerABC)
self._runtime: RuntimeServiceABC = services.get_service(RuntimeServiceABC)
self._path = config.get_configuration('p') self._path = config.get_configuration('p')
def _console(self): pass def _interpret(self, line: str):
tokens = self._lexer.tokenize(line)
def _files(self): pass line.replace("\n", "").replace("\t", "")
Console.write_line(f'\nLINE: {line}')
tokens.for_each(lambda t: Console.write_line(t.type, t.value))
def _console(self):
i = 0
while True:
self._runtime.line_count = i + 1
self._interpret(input('> '))
i += 1
def _files(self):
if not os.path.isdir(self._path):
raise FileNotFoundError(self._path)
# r=root, d=directories, f=files
for r, d, f in os.walk(self._path):
for file in f:
if file.endswith('.cc'):
self._read_file(os.path.join(r, file))
def _read_file(self, file: str):
if not os.path.isfile(file):
raise FileNotFoundError
# self.__utils.runtime_error(Error(ErrorCodes.FileNotFound))
if not file.endswith('.cc'):
raise Exception('Wrong file type')
# self.__utils.runtime_error(Error(ErrorCodes.WrongFileType))
f = open(file, 'r', encoding='utf-8').readlines()
for i in range(0, len(f)):
self._runtime.line_count = i + 1
self._interpret(f[i])
def configure(self): def configure(self):
pass pass
def main(self): def main(self):
Console.write_line(self._configuration.additional_arguments, self._path)
if self._path is None: if self._path is None:
self._console() self._console()
return return
if os.path.isfile(self._path):
self._read_file(self._path)
else:
self._files() self._files()

View File

@ -4,6 +4,8 @@ from cpl.dependency_injection import ServiceProviderABC, ServiceCollectionABC
from lexer.abc.lexer_abc import LexerABC from lexer.abc.lexer_abc import LexerABC
from lexer.service.lexer_service import LexerService from lexer.service.lexer_service import LexerService
from runtime.abc.runtime_service_abc import RuntimeServiceABC
from runtime.service.runtime_service import RuntimeService
class Startup(StartupABC): class Startup(StartupABC):
@ -16,12 +18,13 @@ class Startup(StartupABC):
self._services = services self._services = services
def configure_configuration(self) -> ConfigurationABC: def configure_configuration(self) -> ConfigurationABC:
self._configuration.add_console_argument(ConsoleArgument('-', 'p', [], ' ')) self._configuration.add_console_argument(ConsoleArgument('-', 'p', [], ' ', is_value_token_optional=True))
self._configuration.add_console_arguments() self._configuration.add_console_arguments()
return self._configuration return self._configuration
def configure_services(self) -> ServiceProviderABC: def configure_services(self) -> ServiceProviderABC:
self._services.add_singleton(LexerABC, LexerService) self._services.add_singleton(LexerABC, LexerService)
self._services.add_singleton(RuntimeServiceABC, RuntimeService)
return self._services.build_service_provider() return self._services.build_service_provider()

View File

@ -1,7 +1,14 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from cpl_query.extension.list import List
from lexer.model.token import Token
class LexerABC(ABC): class LexerABC(ABC):
@abstractmethod @abstractmethod
def __init__(self): pass def __init__(self): pass
@abstractmethod
def tokenize(self, line: str) -> List[Token]: pass

20
src/lexer/model/token.py Normal file
View File

@ -0,0 +1,20 @@
from lexer.model.token_types import TokenTypes
class Token:
def __init__(self, token_type: TokenTypes, value: str) -> None:
self._type: TokenTypes = token_type
self._value: str = value
@property
def type(self) -> TokenTypes:
return self._type
@property
def value(self) -> str:
return self._value
@value.setter
def value(self, value: str):
self._value = value

View File

@ -0,0 +1,24 @@
from enum import Enum
class TokenTypes(Enum):
Empty = 0
Keyword = 1
Type = 2
Name = 3
Bool = 4
String = 5
Number = 6
Expression_Character = 7
Bool_Expression_Character = 8
Format_Character = 9
class UnresolvedTokenTypes(Enum):
Empty = 0
Word = 1
Number = 2
String = 3
Expression_Character = 4
Bool_Expression_Character = 5
Format_Character = 6

View File

@ -0,0 +1,62 @@
from enum import Enum
class Keywords(Enum):
# define keywords
Library = 'lib'
Class = 'class'
Function = 'func'
Variable = 'var'
Use = 'use'
From = 'from'
# builtin functions
Output = 'output'
Input = 'input'
Length = 'length'
Range = 'range'
Exit = 'exit'
# normal keywords
If = 'if'
ElseIf = 'elseif'
Else = 'else'
Continue = 'continue'
In = 'in'
Return = 'return'
# loops
While = 'while'
For = 'for'
# access
Public = 'public'
This = 'this'
class Booleans(Enum):
Right = 'true'
Wrong = 'false'
class ExpressionCharacters(Enum):
Plus = '+'
Minus = '-'
Asterisk = '*'
Slash = '/'
Equal = '='
Caret = '^'
class FormatCharacters(Enum):
Left_Brace = '{'
Right_Brace = '}'
Left_Parenthesis = '('
Right_Parenthesis = ')'
Left_Bracket = '['
Right_Bracket = ']'
Semicolon = ';'
Colon = ':'
Comma = ','
Point = '.'

View File

@ -1,7 +1,166 @@
from cpl_query.extension.list import List
from cc_lang.model.language_definition import LanguageDefinition
from lexer.abc.lexer_abc import LexerABC from lexer.abc.lexer_abc import LexerABC
from lexer.model.token import Token
from lexer.model.token_types import UnresolvedTokenTypes, TokenTypes
from runtime.abc.runtime_service_abc import RuntimeServiceABC
class LexerService(LexerABC): class LexerService(LexerABC):
def __init__(self): def __init__(self, runtime: RuntimeServiceABC):
pass self._runtime = runtime
self._is_ml_comment = False
def _add_tok(self, tokens: List[Token], value: str, input_token_type: UnresolvedTokenTypes) -> None:
"""
Creates token object
:param value:
:param input_token_type:
:return:
"""
token_type: TokenTypes = TokenTypes.Empty
if value != '':
if input_token_type == UnresolvedTokenTypes.Word:
if value in LanguageDefinition.keywords:
token_type = TokenTypes.Keyword
elif value in LanguageDefinition.datatypes:
token_type = TokenTypes.Type
elif value in LanguageDefinition.bool_values:
token_type = TokenTypes.Bool
elif value == UnresolvedTokenTypes.Empty:
token_type = TokenTypes.Empty
else:
token_type = TokenTypes.Name
elif input_token_type == UnresolvedTokenTypes.Number:
token_type = TokenTypes.Number
elif input_token_type == UnresolvedTokenTypes.String:
token_type = TokenTypes.String
elif input_token_type == UnresolvedTokenTypes.Expression_Character:
token_type = TokenTypes.Expression_Character
elif input_token_type == UnresolvedTokenTypes.Bool_Expression_Character:
token_type = TokenTypes.Bool_Expression_Character
elif input_token_type == UnresolvedTokenTypes.Format_Character:
token_type = TokenTypes.Format_Character
tokens.append(Token(token_type, value))
def tokenize(self, line: str) -> List[Token]:
tokens: List[Token] = List(Token)
word = ''
ol_comment = False
is_string1 = False # 'hello'
is_string2 = False # "hello"
is_number = False
is_expr_char = False
for i in range(0, len(line)):
c = line[i]
# ignore comments and spaces
if not ol_comment and not self._is_ml_comment:
# comment filtering
if c == '#' and not is_string1 and not is_string2:
ol_comment = True
elif line[i - 1] == '/' and c == '/':
ol_comment = True
elif line[i - 1] == '/' and c == '*':
self._is_ml_comment = True
i += 2
# end of number
elif not c.isdigit() and c != '.' and is_number:
self._add_tok(tokens, word, UnresolvedTokenTypes.Number)
local_tokens = self.tokenize(c)
for local_token in local_tokens:
tokens.append(local_token)
word = ''
is_number = False
# end of expression char
elif c not in LanguageDefinition.expr_chars and is_expr_char:
self._add_tok(tokens, word, UnresolvedTokenTypes.Expression_Character)
word = ''
is_expr_char = False
# begin of is_string1
elif c == '\'' and not is_string1:
is_string1 = True
word = ''
# end of is_string1
elif c == '\'' and is_string1:
is_string1 = False
self._add_tok(tokens, word, UnresolvedTokenTypes.String)
word = ''
# begin of is_string2
elif c == '\"' and not is_string2:
is_string2 = True
word = ''
# end of is_string2
elif c == '\"' and is_string2:
is_string2 = False
self._add_tok(tokens, word, UnresolvedTokenTypes.String)
word = ''
# format char
elif c in LanguageDefinition.format_chars:
self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
self._add_tok(tokens, c, UnresolvedTokenTypes.Format_Character)
word = ''
# begin of number
elif c.isdigit() and not is_number and word == '':
word += c
is_number = True
# continue number
elif (c.isdigit() or c == '.') and is_number:
word += c
# begin expression char
elif c in LanguageDefinition.expr_chars and not is_expr_char:
word += c
is_expr_char = True
# continue expression char
elif c in LanguageDefinition.expr_chars and is_expr_char:
word += c
# bool expression char
elif c in LanguageDefinition.bool_expr_chars:
self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
self._add_tok(tokens, c, UnresolvedTokenTypes.Bool_Expression_Character)
word = ''
# end of word
elif c == ' ' and not is_string1 and not is_string2 or c == '\n':
self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
word = ''
else:
word += c
if c == '\n' and ol_comment:
ol_comment = False
if line[i - 1] == '*' and c == '/':
self._is_ml_comment = False
return tokens

View File

@ -0,0 +1,15 @@
from abc import ABC, abstractmethod
class RuntimeServiceABC(ABC):
@abstractmethod
def __init__(self): pass
@property
@abstractmethod
def line_count(self) -> int: pass
@line_count.setter
@abstractmethod
def line_count(self, line_count: int): pass

View File

@ -0,0 +1 @@
# imports

View File

@ -0,0 +1,15 @@
from runtime.abc.runtime_service_abc import RuntimeServiceABC
class RuntimeService(RuntimeServiceABC):
def __init__(self):
self._line_count = 0
@property
def line_count(self) -> int:
return self._line_count
@line_count.setter
def line_count(self, line_count: int):
self._line_count = line_count