Added lexer
This commit is contained in:
@@ -1,7 +1,14 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from cpl_query.extension.list import List
|
||||
|
||||
from lexer.model.token import Token
|
||||
|
||||
|
||||
class LexerABC(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self): pass
|
||||
|
||||
@abstractmethod
|
||||
def tokenize(self, line: str) -> List[Token]: pass
|
||||
|
0
src/lexer/model/__init__.py
Normal file
0
src/lexer/model/__init__.py
Normal file
20
src/lexer/model/token.py
Normal file
20
src/lexer/model/token.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from lexer.model.token_types import TokenTypes
|
||||
|
||||
|
||||
class Token:
|
||||
|
||||
def __init__(self, token_type: TokenTypes, value: str) -> None:
|
||||
self._type: TokenTypes = token_type
|
||||
self._value: str = value
|
||||
|
||||
@property
|
||||
def type(self) -> TokenTypes:
|
||||
return self._type
|
||||
|
||||
@property
|
||||
def value(self) -> str:
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, value: str):
|
||||
self._value = value
|
24
src/lexer/model/token_types.py
Normal file
24
src/lexer/model/token_types.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class TokenTypes(Enum):
|
||||
Empty = 0
|
||||
Keyword = 1
|
||||
Type = 2
|
||||
Name = 3
|
||||
Bool = 4
|
||||
String = 5
|
||||
Number = 6
|
||||
Expression_Character = 7
|
||||
Bool_Expression_Character = 8
|
||||
Format_Character = 9
|
||||
|
||||
|
||||
class UnresolvedTokenTypes(Enum):
|
||||
Empty = 0
|
||||
Word = 1
|
||||
Number = 2
|
||||
String = 3
|
||||
Expression_Character = 4
|
||||
Bool_Expression_Character = 5
|
||||
Format_Character = 6
|
62
src/lexer/model/token_value_types.py
Normal file
62
src/lexer/model/token_value_types.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Keywords(Enum):
|
||||
# define keywords
|
||||
Library = 'lib'
|
||||
Class = 'class'
|
||||
Function = 'func'
|
||||
Variable = 'var'
|
||||
Use = 'use'
|
||||
From = 'from'
|
||||
|
||||
# builtin functions
|
||||
Output = 'output'
|
||||
Input = 'input'
|
||||
Length = 'length'
|
||||
Range = 'range'
|
||||
Exit = 'exit'
|
||||
|
||||
# normal keywords
|
||||
If = 'if'
|
||||
ElseIf = 'elseif'
|
||||
Else = 'else'
|
||||
Continue = 'continue'
|
||||
In = 'in'
|
||||
Return = 'return'
|
||||
|
||||
# loops
|
||||
While = 'while'
|
||||
For = 'for'
|
||||
|
||||
# access
|
||||
Public = 'public'
|
||||
This = 'this'
|
||||
|
||||
|
||||
class Booleans(Enum):
|
||||
Right = 'true'
|
||||
Wrong = 'false'
|
||||
|
||||
|
||||
class ExpressionCharacters(Enum):
|
||||
Plus = '+'
|
||||
Minus = '-'
|
||||
Asterisk = '*'
|
||||
Slash = '/'
|
||||
Equal = '='
|
||||
Caret = '^'
|
||||
|
||||
|
||||
class FormatCharacters(Enum):
|
||||
Left_Brace = '{'
|
||||
Right_Brace = '}'
|
||||
Left_Parenthesis = '('
|
||||
Right_Parenthesis = ')'
|
||||
Left_Bracket = '['
|
||||
Right_Bracket = ']'
|
||||
Semicolon = ';'
|
||||
Colon = ':'
|
||||
Comma = ','
|
||||
Point = '.'
|
||||
|
@@ -1,7 +1,166 @@
|
||||
from cpl_query.extension.list import List
|
||||
|
||||
from cc_lang.model.language_definition import LanguageDefinition
|
||||
from lexer.abc.lexer_abc import LexerABC
|
||||
from lexer.model.token import Token
|
||||
from lexer.model.token_types import UnresolvedTokenTypes, TokenTypes
|
||||
from runtime.abc.runtime_service_abc import RuntimeServiceABC
|
||||
|
||||
|
||||
class LexerService(LexerABC):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
def __init__(self, runtime: RuntimeServiceABC):
|
||||
self._runtime = runtime
|
||||
|
||||
self._is_ml_comment = False
|
||||
|
||||
def _add_tok(self, tokens: List[Token], value: str, input_token_type: UnresolvedTokenTypes) -> None:
|
||||
"""
|
||||
Creates token object
|
||||
:param value:
|
||||
:param input_token_type:
|
||||
:return:
|
||||
"""
|
||||
token_type: TokenTypes = TokenTypes.Empty
|
||||
|
||||
if value != '':
|
||||
if input_token_type == UnresolvedTokenTypes.Word:
|
||||
if value in LanguageDefinition.keywords:
|
||||
token_type = TokenTypes.Keyword
|
||||
|
||||
elif value in LanguageDefinition.datatypes:
|
||||
token_type = TokenTypes.Type
|
||||
|
||||
elif value in LanguageDefinition.bool_values:
|
||||
token_type = TokenTypes.Bool
|
||||
|
||||
elif value == UnresolvedTokenTypes.Empty:
|
||||
token_type = TokenTypes.Empty
|
||||
|
||||
else:
|
||||
token_type = TokenTypes.Name
|
||||
|
||||
elif input_token_type == UnresolvedTokenTypes.Number:
|
||||
token_type = TokenTypes.Number
|
||||
|
||||
elif input_token_type == UnresolvedTokenTypes.String:
|
||||
token_type = TokenTypes.String
|
||||
|
||||
elif input_token_type == UnresolvedTokenTypes.Expression_Character:
|
||||
token_type = TokenTypes.Expression_Character
|
||||
|
||||
elif input_token_type == UnresolvedTokenTypes.Bool_Expression_Character:
|
||||
token_type = TokenTypes.Bool_Expression_Character
|
||||
|
||||
elif input_token_type == UnresolvedTokenTypes.Format_Character:
|
||||
token_type = TokenTypes.Format_Character
|
||||
|
||||
tokens.append(Token(token_type, value))
|
||||
|
||||
def tokenize(self, line: str) -> List[Token]:
|
||||
tokens: List[Token] = List(Token)
|
||||
word = ''
|
||||
ol_comment = False
|
||||
is_string1 = False # 'hello'
|
||||
is_string2 = False # "hello"
|
||||
is_number = False
|
||||
is_expr_char = False
|
||||
|
||||
for i in range(0, len(line)):
|
||||
c = line[i]
|
||||
# ignore comments and spaces
|
||||
if not ol_comment and not self._is_ml_comment:
|
||||
# comment filtering
|
||||
if c == '#' and not is_string1 and not is_string2:
|
||||
ol_comment = True
|
||||
|
||||
elif line[i - 1] == '/' and c == '/':
|
||||
ol_comment = True
|
||||
|
||||
elif line[i - 1] == '/' and c == '*':
|
||||
self._is_ml_comment = True
|
||||
i += 2
|
||||
|
||||
# end of number
|
||||
elif not c.isdigit() and c != '.' and is_number:
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.Number)
|
||||
local_tokens = self.tokenize(c)
|
||||
for local_token in local_tokens:
|
||||
tokens.append(local_token)
|
||||
|
||||
word = ''
|
||||
is_number = False
|
||||
|
||||
# end of expression char
|
||||
elif c not in LanguageDefinition.expr_chars and is_expr_char:
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.Expression_Character)
|
||||
word = ''
|
||||
is_expr_char = False
|
||||
|
||||
# begin of is_string1
|
||||
elif c == '\'' and not is_string1:
|
||||
is_string1 = True
|
||||
word = ''
|
||||
|
||||
# end of is_string1
|
||||
elif c == '\'' and is_string1:
|
||||
is_string1 = False
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.String)
|
||||
word = ''
|
||||
|
||||
# begin of is_string2
|
||||
elif c == '\"' and not is_string2:
|
||||
is_string2 = True
|
||||
word = ''
|
||||
|
||||
# end of is_string2
|
||||
elif c == '\"' and is_string2:
|
||||
is_string2 = False
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.String)
|
||||
word = ''
|
||||
|
||||
# format char
|
||||
elif c in LanguageDefinition.format_chars:
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
|
||||
self._add_tok(tokens, c, UnresolvedTokenTypes.Format_Character)
|
||||
word = ''
|
||||
|
||||
# begin of number
|
||||
elif c.isdigit() and not is_number and word == '':
|
||||
word += c
|
||||
is_number = True
|
||||
|
||||
# continue number
|
||||
elif (c.isdigit() or c == '.') and is_number:
|
||||
word += c
|
||||
|
||||
# begin expression char
|
||||
elif c in LanguageDefinition.expr_chars and not is_expr_char:
|
||||
word += c
|
||||
is_expr_char = True
|
||||
|
||||
# continue expression char
|
||||
elif c in LanguageDefinition.expr_chars and is_expr_char:
|
||||
word += c
|
||||
|
||||
# bool expression char
|
||||
elif c in LanguageDefinition.bool_expr_chars:
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
|
||||
self._add_tok(tokens, c, UnresolvedTokenTypes.Bool_Expression_Character)
|
||||
word = ''
|
||||
|
||||
# end of word
|
||||
elif c == ' ' and not is_string1 and not is_string2 or c == '\n':
|
||||
self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
|
||||
word = ''
|
||||
|
||||
else:
|
||||
word += c
|
||||
|
||||
if c == '\n' and ol_comment:
|
||||
ol_comment = False
|
||||
|
||||
if line[i - 1] == '*' and c == '/':
|
||||
self._is_ml_comment = False
|
||||
|
||||
return tokens
|
||||
|
Reference in New Issue
Block a user