from cpl_query.extension.list import List

from cc_lang.model.language_definition import LanguageDefinition
from lexer.abc.lexer_abc import LexerABC
from lexer.model.token import Token
from lexer.model.token_types import UnresolvedTokenTypes, TokenTypes
from runtime.abc.runtime_service_abc import RuntimeServiceABC


class LexerService(LexerABC):

    def __init__(self, runtime: RuntimeServiceABC):
        self._runtime = runtime
        
        self._is_ml_comment = False

    def _add_tok(self, tokens: List[Token], value: str, input_token_type: UnresolvedTokenTypes) -> None:
        """
        Creates token object
        :param value:
        :param input_token_type:
        :return:
        """
        token_type: TokenTypes = TokenTypes.Empty

        if value != '':
            if input_token_type == UnresolvedTokenTypes.Word:
                if value in LanguageDefinition.keywords:
                    token_type = TokenTypes.Keyword

                elif value in LanguageDefinition.datatypes:
                    token_type = TokenTypes.Type

                elif value in LanguageDefinition.bool_values:
                    token_type = TokenTypes.Bool

                elif value == UnresolvedTokenTypes.Empty:
                    token_type = TokenTypes.Empty

                else:
                    token_type = TokenTypes.Name

            elif input_token_type == UnresolvedTokenTypes.Number:
                token_type = TokenTypes.Number

            elif input_token_type == UnresolvedTokenTypes.String:
                token_type = TokenTypes.String

            elif input_token_type == UnresolvedTokenTypes.Expression_Character:
                token_type = TokenTypes.Expression_Character

            elif input_token_type == UnresolvedTokenTypes.Bool_Expression_Character:
                token_type = TokenTypes.Bool_Expression_Character

            elif input_token_type == UnresolvedTokenTypes.Format_Character:
                token_type = TokenTypes.Format_Character

            tokens.append(Token(token_type, value))

    def tokenize(self, line: str) -> List[Token]:
        tokens: List[Token] = List(Token)
        word = ''
        ol_comment = False
        is_string1 = False  # 'hello'
        is_string2 = False  # "hello"
        is_number = False
        is_expr_char = False

        for i in range(0, len(line)):
            c = line[i]
            # ignore comments and spaces
            if not ol_comment and not self._is_ml_comment:
                # comment filtering
                if c == '#' and not is_string1 and not is_string2:
                    ol_comment = True

                elif line[i - 1] == '/' and c == '/':
                    ol_comment = True

                elif line[i - 1] == '/' and c == '*':
                    self._is_ml_comment = True
                    i += 2

                # end of number
                elif not c.isdigit() and c != '.' and is_number:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Number)
                    local_tokens = self.tokenize(c)
                    for local_token in local_tokens:
                        tokens.append(local_token)

                    word = ''
                    is_number = False

                # end of expression char
                elif c not in LanguageDefinition.expr_chars and is_expr_char:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Expression_Character)
                    word = ''
                    is_expr_char = False

                # begin of is_string1
                elif c == '\'' and not is_string1:
                    is_string1 = True
                    word = ''

                # end of is_string1
                elif c == '\'' and is_string1:
                    is_string1 = False
                    self._add_tok(tokens, word, UnresolvedTokenTypes.String)
                    word = ''

                # begin of is_string2
                elif c == '\"' and not is_string2:
                    is_string2 = True
                    word = ''

                # end of is_string2
                elif c == '\"' and is_string2:
                    is_string2 = False
                    self._add_tok(tokens, word, UnresolvedTokenTypes.String)
                    word = ''

                # format char
                elif c in LanguageDefinition.format_chars:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
                    self._add_tok(tokens, c, UnresolvedTokenTypes.Format_Character)
                    word = ''

                # begin of number
                elif c.isdigit() and not is_number and word == '':
                    word += c
                    is_number = True

                # continue number
                elif (c.isdigit() or c == '.') and is_number:
                    word += c

                # begin expression char
                elif c in LanguageDefinition.expr_chars and not is_expr_char:
                    word += c
                    is_expr_char = True

                # continue expression char
                elif c in LanguageDefinition.expr_chars and is_expr_char:
                    word += c

                # bool expression char
                elif c in LanguageDefinition.bool_expr_chars:
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
                    self._add_tok(tokens, c, UnresolvedTokenTypes.Bool_Expression_Character)
                    word = ''

                # end of word
                elif c == ' ' and not is_string1 and not is_string2 or c == '\n':
                    self._add_tok(tokens, word, UnresolvedTokenTypes.Word)
                    word = ''

                else:
                    word += c

            if c == '\n' and ol_comment:
                ol_comment = False

            if line[i - 1] == '*' and c == '/':
                self._is_ml_comment = False

        return tokens