lexical-parser (#15)

add: 词法分析器剩下数字、标识符的细节处理以及错误处理 Co-authored-by: duqoo <92306417+duqoo@users.noreply.github.com> Reviewed-on: PostGuard/Canon#15 Co-authored-by: Huaps <1183155719@qq.com> Co-committed-by: Huaps <1183155719@qq.com>
2024-04-04 21:25:11 +08:00
parent ccd1899739
commit c4189fd1b2
13 changed files with 955 additions and 426 deletions
--- a/Canon.Core/Enums/ErrorEnums.cs
+++ b/Canon.Core/Enums/ErrorEnums.cs
@@ -0,0 +1,11 @@
+namespace Canon.Core.Enums;
+
+public enum LexemeErrorType
+{
+    IllegalNumberFormat,//数字格式不正确
+    UnknownCharacterOrString,//源代码包含无法识别的字符或字符串
+    UnclosedStringLiteral,//字符串字面量未闭合
+    UnclosedComment,//注释未闭合
+    InvalidEscapeSequence,//无效的转义字符
+    IllegalOperator,//非法的操作符
+}
--- a/Canon.Core/Enums/SemanticEnums.cs
+++ b/Canon.Core/Enums/SemanticEnums.cs
@@ -93,7 +93,7 @@ public enum StateType
    Word,
    Digit,
    Delimiter,
-    Other
+    Operator
 }

 public enum BasicIdType
--- a/Canon.Core/Exceptions/LexemeException.cs
+++ b/Canon.Core/Exceptions/LexemeException.cs
@@ -1,9 +1,13 @@
 namespace Canon.Core.Exceptions;
+using Enums;
 /// <summary>
 /// 词法分析中引发的异常
 /// </summary>
 public class LexemeException : Exception
 {
+    public LexemeErrorType ErrorType { get; }
+    public uint Line { get; }
+    public uint CharPosition { get; }
    public LexemeException()  { }

    public LexemeException(string message) : base(message) { }
@@ -11,15 +15,20 @@ public class LexemeException : Exception
    public LexemeException(string message, Exception innerException) :
        base(message, innerException) { }

+    /// <param name="errorType">错误类型</param>
    /// <param name="line">单词的行号</param>
    /// <param name="charPosition">单词的列号</param>
    /// <param name="message">错误信息</param>
-    public LexemeException(uint line, uint charPosition, string message) :
-        base("line:" + line + ", charPosition:" + charPosition + " :" + message) { }
+    public LexemeException(LexemeErrorType errorType, uint line, uint charPosition, string message) :
+        base("line:" + line + ", charPosition:" + charPosition + " :" + message)
+    {
+        ErrorType = errorType;
+        Line = line;
+        CharPosition = charPosition;
+    }

-    public LexemeException(uint line, uint charPosition, Exception innerException) :
-        base("line:" + line + ", charPosition:" + charPosition + " : ", innerException) { }
-
-    public LexemeException(uint line, uint charPosition, string message, Exception innerException) :
-        base("line:" + line + ", charPosition:" + charPosition + " :" + message, innerException) { }
+    public override string ToString()
+    {
+        return $"LexemeException: ErrorType={ErrorType}, Line={Line}, CharPosition={CharPosition}, Message={Message}\n";
+    }
 }
--- a/Canon.Core/LexicalParser/Lexer.cs
+++ b/Canon.Core/LexicalParser/Lexer.cs
@@ -1,5 +1,7 @@
+using System.Numerics;
 using System.Text;
 using Canon.Core.Enums;
+using Canon.Core.Exceptions;

 namespace Canon.Core.LexicalParser;

@@ -17,27 +19,31 @@ public class Lexer(string source)
        "Not", "Mod", "And", "Or"
    ];

-    private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]","'","\"",".."];
+    private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."];
+
+    private readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="];

    // 状态机
    private StateType _state;
    private char _ch;

    private LinkedList<char> _token = new LinkedList<char>();
+
    // bool save;
    // int saved_state;
    bool _finish;
-    private bool eof;
+

    //缓冲区
    private readonly char[] _buffer = new char[2048];
+
    // int start_pos;
    private int _fwdPos;

    // 计数器
    private uint _line = 1;
    private uint _chPos;
-    private int _sourcePos;
+
    private readonly Dictionary<SemanticTokenType, int> _tokenCount = new Dictionary<SemanticTokenType, int>
    {
        { SemanticTokenType.Keyword, 0 },
@@ -57,234 +63,394 @@ public class Lexer(string source)
        // 缓冲区
        // start_pos = 0;
        _fwdPos = 0;
-        FillLeftBuffer();

        // 状态机
        _finish = false;

-        while (!_finish) {
+        while (!_finish)
+        {
            GetChar();
            GetNbc();
+            if (_finish) break;

            _token = new LinkedList<char>();

-            if (IsLetter()) {
+            if (IsLetter())
+            {
                _state = StateType.Word;
            }
-            else if (IsDigit()) {
+            else if(_ch == '.')
+            {
+                char next = PeekNextChar();
+                if (next >= '0' && next <= '9')
+                {
+                    _state = StateType.Digit;
+                }
+                else
+                {
+                    _state = StateType.Delimiter;
+                }
+            }
+            else if (IsDigit() || _ch == '$')
+            {
                _state = StateType.Digit;
            }
-            else if (IsDelimiter()) {
+            else if (IsDelimiter())
+            {
                _state = StateType.Delimiter;
            }
+            else if (_ch == '{')
+            {
+                GetChar();
+                while (_ch != '}')
+                {
+                    GetChar();
+                    if (_ch == '\n')
+                    {
+                        _line++;
+                        _chPos = 0;
+                    }
+                    if (_finish)
+                    {
+                        throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed.");
+                    }
+
+                }
+
+                continue;
+            }
            else
            {
-                _state = StateType.Other;
+                _state = StateType.Operator;
            }

            switch (_state)
            {
-            case StateType.Word: {
-                while (IsDigit() || IsLetter())
+                case StateType.Word:
+                    while (IsDigit() || IsLetter())
+                    {
+                        Cat();
+                        GetChar();
+                    }
+
+                    Retract();
+
+                    if (IsKeyword())
+                    {
+                        KeywordType keywordType =
+                            KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First));
+                        MakeToken(keywordType);
+                    }
+                    else
+                    {
+                        MakeToken(SemanticTokenType.Identifier);
+                    }
+
+                    break;
+                case StateType.Digit:
+                    DealNumber();
+                    break;
+                case StateType.Delimiter:
+                    Cat();
+                    switch (_ch)
+                    {
+                        case '.':
+                            {
+                                GetChar();
+                                if (_ch == '.')
+                                {
+                                    Cat();
+                                    MakeToken(DelimiterType.DoubleDots);
+                                    break;
+                                }
+
+                                Retract();
+                                if (IsDot())
+                                {
+                                    MakeToken(DelimiterType.Dot);
+                                }
+                                else
+                                {
+                                    MakeToken(DelimiterType.Period);
+                                }
+                            }
+                            break;
+                        case '\'':
+                        case '\"':
+                            {
+                                // 重置_token，准备收集字符串内容
+                                _token = new LinkedList<char>();
+
+                                GetChar(); // 移动到下一个字符，即字符串的第一个字符
+                                while (_ch != '\'' && _ch != '\"')
+                                {
+                                    Cat(); // 收集字符
+                                    GetChar(); // 移动到下一个字符
+                                    if (_ch == '\n' || _finish)
+                                    {
+                                        throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos, "The String is not closed.");
+                                    }
+                                }
+
+                                MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType
+                                _token = new LinkedList<char>(); // 重置_token
+
+                                if (!(_ch == '\'' || _ch == '\"'))
+                                {
+                                    throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos, "The String is not closed.");
+                                }
+                            }
+                            break;
+                        case ',':
+                            MakeToken(DelimiterType.Comma);
+                            break;
+                        case ':':
+                            char nextChar = PeekNextChar();
+                            if (nextChar == '=')
+                            {
+                                GetChar();
+                                Cat();
+                                MakeToken(OperatorType.Assign);
+                            }
+                            else
+                            {
+                                MakeToken(DelimiterType.Colon);
+                            }
+
+                            break;
+                        case ';':
+                            MakeToken(DelimiterType.Semicolon);
+                            break;
+                        case '(':
+                            char next = PeekNextChar();
+                            if (next == '*')
+                            {
+                                GetChar();
+                                bool commentClosed = false;
+                                while (!commentClosed)
+                                {
+                                    GetNbc();
+                                    GetChar();
+                                    while (_ch != '*')
+                                    {
+                                        GetNbc();
+                                        GetChar();
+                                        if (_finish)
+                                        {
+                                            throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed.");
+                                        }
+                                    }
+
+                                    GetChar();
+                                    if (_finish)
+                                    {
+                                        throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed.");
+                                    }
+
+                                    if (_ch == ')') commentClosed = true;
+                                }
+                            }
+                            else
+                            {
+                                MakeToken(DelimiterType.LeftParenthesis);
+                            }
+
+                            break;
+                        case ')':
+                            MakeToken(DelimiterType.RightParenthesis);
+                            break;
+                        case '[':
+                            MakeToken(DelimiterType.LeftSquareBracket);
+                            break;
+                        case ']':
+                            MakeToken(DelimiterType.RightSquareBracket);
+                            break;
+                    }
+
+                    break;
+                case StateType.Operator:
+                    DealOther();
+                    break;
+                default:
+                    throw new ArgumentOutOfRangeException();
+            }
+
+        }
+
+        return _tokens;
+    }
+
+    private void DealNumber()
+    {
+        // 十六进制
+        if (_ch == '$')
+        {
+            Cat();
+
+            GetChar();
+            while (!NumberShouldBreak())
+            {
+                // 假设IsHexDigit方法能够识别十六进制数字
+                if (IsHexDigit())
                {
                    Cat();
                    GetChar();
                }
-                Retract();
-
-                if (IsKeyword())
+                else if(NumberShouldBreak())
                {
-                    KeywordType keywordType =
-                        KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First));
-                    MakeToken(keywordType);
+                    break;
                }
-                else {
-                    MakeToken(SemanticTokenType.Identifier);
+                else
+                {
+                    throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal hex numbers!");
                }
-                break;
            }
+            MakeToken(NumberType.Hex);
+            return;
+        }

-            case StateType.Digit:
+        // 非十六进制
+        if(IsDigit() || _ch == '.')
+        {
+            while (!NumberShouldBreak())
+            {
+                // 含小数部分
+                if (_ch == '.')
                {
-                    bool error = false;
-                    bool tag = false; // 用于标记是否已经处理过科学记数法的指数部分
-                    bool doubleDot = false;
-                    NumberType numberType = NumberType.Integer;
-
-                    while (IsDigit() || _ch == '.' || _ch == 'E' || _ch == '+' || _ch == '-' || _ch == 'e' || IsLetter()) {
-                        if (_ch != '.')
-                        {
-                            Cat();
-                        }
-
-
-                        if (_ch == '0' && !tag) {
-                            GetChar();
-                            if (_ch == 'x' || _ch == 'X') {
-                                numberType = NumberType.Hex;    // 标识十六进制
-                                Cat();
-                                while (IsHexDigit()) { // 假设IsHexDigit方法能够识别十六进制数字
-                                    Cat();
-                                }
-                                break;
-                            }
-                            Retract(); // 如果不是'x'或'X'，回退一个字符
-                        }
-                        else if (_ch == '.') {
-                            GetChar();
-                            if (_ch == '.') {
-                                Retract(); // 回退到第一个'.'
-                                Retract(); // 回退到'.'之前的数字
-                                doubleDot = true;
-                                break;
-                            }
-                            Retract();
-                            Cat();
-                            numberType = NumberType.Real;
-                        }
-                        else if ((_ch == 'e' || _ch == 'E') && !tag) {
-                            GetChar();
-                            if (IsDigit() || _ch == '+' || _ch == '-') {
-                                Cat();
-                                tag = true; // 已处理指数部分
-                                continue;
-                            }
-                            error = true; // 错误的科学记数法
-                            break;
-                        }
-
-                        GetChar();
-                    }
-
-                    if (!error) {
-                        MakeToken(numberType);
-                        if (doubleDot)
-                        {
-                            break;
-                        }
-                        Retract();
-                    }
-                    else
+                    // 检查是否是符号 “..”
+                    char next = PeekNextChar();
+                    if (next == '.')
                    {
                        Retract();
-                        PrintError(0,_token.First,_line);
-                        _tokenCount[SemanticTokenType.Error]++;
+                        break;
                    }
-                    break;
-                }

-            case StateType.Delimiter:
-                Cat();
-                switch (_ch)
-                {
-                case '.':
+                    // 不是符号 “..”,进入小数点后的判断
+                    Cat();  // 记录“.”
+
+                    // “.”后不应为空，至少应该有一位小数
+                    GetChar();
+                    if (NumberShouldBreak())
                    {
-                        GetChar();
-                        if (_ch == '.')
+                        throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal numbers!");
+                    }
+
+                    // 读取小数点后的数字
+                    while (!NumberShouldBreak())
+                    {
+                        if (IsDigit())
                        {
                            Cat();
-                            MakeToken(DelimiterType.DoubleDots);
+                            GetChar();
+                        }
+                        else if (_ch == 'e' || _ch == 'E')
+                        {
+                            DealE();
                            break;
                        }
-                        Retract();
-                        if (IsPeriod())
+                        else if(NumberShouldBreak())
                        {
-
-                        }else if (IsDot())
-                        {
-
-                        }
-                    }
-                    break;
-                case '\'':
-                case '\"':
-                    {
-                        if(_ch == '\'') MakeToken(DelimiterType.SingleQuotation);
-                        else if(_ch == '\"') MakeToken(DelimiterType.DoubleQuotation);
-
-                        // 重置_token，准备收集字符串内容
-                        _token = new LinkedList<char>();
-
-                        GetChar(); // 移动到下一个字符，即字符串的第一个字符
-                        while (_ch != '\'' && _ch != '\"')
-                        {
-                            Cat(); // 收集字符
-                            GetChar(); // 移动到下一个字符
-                        }
-
-                        // 在退出循环时，_ch为'或EOF，此时_token包含字符串内容
-                        // 创建字符内容的token，注意这里使用SemanticTokenType.String表示字符串字面量
-                        MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType
-                        _token = new LinkedList<char>(); // 重置_token
-
-                        if (_ch == '\'' && _ch != '\n')
-                        {
-                            // 识别并创建最后一个单引号的token
-                            Cat();
-                            MakeToken(DelimiterType.SingleQuotation);
-                        }
-                        else if (_ch == '\"')
-                        {
-                            Cat();
-                            MakeToken(DelimiterType.DoubleQuotation);
+                            break;
                        }
                        else
                        {
-                            // 这里处理遇到EOF但没有闭合单引号的情况，例如：'字符串结尾没有单引号
-                            // 可以添加错误处理代码
-                            PrintError(0, _token.First, _line); // 假设这个方法用于打印错误
+                            throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
                        }
                    }
-                    break;
-                case ',':
-                    MakeToken(DelimiterType.Comma);
-                    break;
-                case ':':
-                    MakeToken(DelimiterType.Colon);
-                    break;
-                case ';':
-                    MakeToken(DelimiterType.Semicolon);
-                    break;
-                case '(':
-                    MakeToken(DelimiterType.LeftParenthesis);
-                    break;
-                case ')':
-                    MakeToken(DelimiterType.RightParenthesis);
-                    break;
-                case '[':
-                    MakeToken(DelimiterType.LeftSquareBracket);
-                    break;
-                case ']':
-                    MakeToken(DelimiterType.RightSquareBracket);
+                    MakeToken(NumberType.Real);
+                    return;
+                }
+
+                // 不含小数部分，含科学计数法
+                if (_ch == 'e' || _ch == 'E')
+                {
+                    DealE();
+                    MakeToken(NumberType.Real);
+                    return;
+                }
+
+                // 暂时为整数
+                if (IsDigit())
+                {
+                    Cat();
+                    GetChar();
+                }
+                else if(NumberShouldBreak())
+                {
                    break;
                }
-                break;
-
-            case StateType.Other:
-                DealOther();
-                break;
-            default:
-                throw new ArgumentOutOfRangeException();
+                else
+                {
+                    throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
+                }
            }
-
+            MakeToken(NumberType.Integer);
        }
-        PrintResult();
-        return _tokens;
+
+    }
+
+    private void DealE()
+    {
+        Cat();
+        GetChar();
+        if (IsDigit() || _ch == '+' || _ch == '-')
+        {
+            Cat();
+        }
+        else
+        {
+            throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
+        }
+
+        // 读取e后的数字
+        GetChar();
+        while (!NumberShouldBreak())
+        {
+            if (IsDigit())
+            {
+                Cat();
+                GetChar();
+            }
+            else
+            {
+                throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
+            }
+        }
+    }
+
+    bool NumberShouldBreak()
+    {
+        if (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r' || (IsDelimiter() && _ch!='.') || IsOperator() || _finish)
+        {
+            Retract();
+            return true;
+        }
+
+        return false;
+    }
+
+    private bool IsOperator()
+    {
+        foreach (var o in _operator)
+        {
+            if (o.Contains(_ch))
+            {
+                return true;
+            }
+        }
+        return false;
    }

    private bool IsDot()
    {
-        SemanticToken tokenBefore = _tokens.Last();
-        if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
+        if (_tokens.Count != 0)
+        {
+            SemanticToken tokenBefore = _tokens.Last();
+            if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
+        }
        return false;
    }

-    private bool IsPeriod()
-    {
-        SemanticToken tokenBefore = _tokens.Last();
-        if (tokenBefore.TokenType == SemanticTokenType.Keyword) return true;
-        return false;
-    }

    private void DealOther()
    {
@@ -348,28 +514,8 @@ public class Lexer(string source)
                    MakeToken(OperatorType.Greater);
                }
                break;
-            case ':':
-                Cat();
-                GetChar();
-                if (_ch == '=')
-                {
-                    // 识别 :=
-                    Cat();
-                    MakeToken(OperatorType.Assign);
-                }
-                else
-                {
-                    // 这里应该被识别为delimiter逻辑上
-                    Cat();
-                    PrintError(1, _token.First, _line);
-                    _tokenCount[SemanticTokenType.Error]++;
-                }
-                break;
            default:
-                Cat();
-                PrintError(1, _token.First, _line);
-                _tokenCount[SemanticTokenType.Error]++;
-                break;
+                throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos, "Illegal lexeme.");
        }
    }

@@ -396,14 +542,6 @@ public class Lexer(string source)
                };
                token = identifierSemanticToken;
                break;
-            case SemanticTokenType.Error:
-                ErrorSemanticToken errorSemanticToken = new ErrorSemanticToken()
-                {
-                    LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First),
-                };
-                token = errorSemanticToken;
-                break;
-
            default:
                throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null);
        }
@@ -449,6 +587,32 @@ public class Lexer(string source)
        Console.WriteLine(LinkedListToString(_token.First));
    }

+    private void MakeToken(NumberType numberType)
+    {
+        string temp = LinkedListToString(_token.First);
+        string result;
+        if (numberType == NumberType.Hex)
+        {
+            result = string.Concat("0x", temp.AsSpan(1, temp.Length - 1));
+        }
+        else
+        {
+            result = temp;
+        }
+
+        NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
+        {
+            LinePos = _line,
+            CharacterPos = _chPos,
+            LiteralValue = result,
+            NumberType = numberType
+        };
+        _tokens.Add(numberSemanticToken);
+        _tokenCount[SemanticTokenType.Number]++;
+        Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>");
+        Console.WriteLine(LinkedListToString(_token.First));
+    }
+
    private void MakeToken(OperatorType operatorType)
    {
        OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken()
@@ -464,88 +628,20 @@ public class Lexer(string source)
        Console.WriteLine(LinkedListToString(_token.First));
    }

-    private void MakeToken(NumberType numberType)
-    {
-        NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
-        {
-            LinePos = _line,
-            CharacterPos = _chPos,
-            LiteralValue = LinkedListToString(_token.First),
-            NumberType = numberType
-        };
-        _tokens.Add(numberSemanticToken);
-        _tokenCount[SemanticTokenType.Number]++;
-        Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>");
-        Console.WriteLine(LinkedListToString(_token.First));
-    }
-
-    // 填充buffer操作
-    private void FillLeftBuffer() {
-        //cout << "fill left" << endl;
-        for (int i = 0; i < _buffer.Length / 2; i++) {
-            _buffer[i] = '$';
-        }
-
-        // 确保source字符串足够长，避免超出范围
-        int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos);
-
-        // 使用Array.Copy方法
-        Array.Copy(source.ToCharArray(), _sourcePos, _buffer, 0, lengthToCopy);
-
-        _sourcePos += lengthToCopy;
-
-        if (_sourcePos == source.Length) {
-            eof = true;
-        }
-    }
-
-    private void FillRightBuffer() {
-        //cout << "fill right" << endl;
-        for (int i = _buffer.Length / 2; i < _buffer.Length; i++) {
-            _buffer[i] = '$';
-        }
-
-        // 确保source字符串足够长，避免超出范围
-        int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos);
-
-        // 使用Array.Copy方法
-        Array.Copy(source.ToCharArray(), _sourcePos, _buffer, _buffer.Length / 2, lengthToCopy);
-
-        _sourcePos += lengthToCopy;
-
-        if (_sourcePos == source.Length) {
-            eof = true;
-        }
-    }
-
-    private void PrintBuffer() {
-        for (int i = 0; i < _buffer.Length; i++) {
-            Console.WriteLine($"[{i}] {_buffer[i]}");
-        }
-    }
-
-    void DealEof() {
-        if (eof) _finish = true;
-        else if (_fwdPos < _buffer.Length / 2) {
-            FillRightBuffer();
-            _fwdPos = _buffer.Length / 2;
-        }
-        else {
-            FillLeftBuffer();
-            // start_pos = 0;
-            _fwdPos = 0;
-        }
-    }
-
-    // 读取buffer操作
+    // 读取字符操作
    void GetChar() {
-        if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos];
-        _chPos++;
-        if (_ch == '$') {
-            DealEof();
-            if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos];
+        if (_fwdPos >= 0 && _fwdPos < source.Length)
+        {
+            _ch = source[_fwdPos];
+            _chPos++;
+            _fwdPos++;
+        }
+        else if (_fwdPos == source.Length)
+        {
+            _ch = '\0';
+            _chPos++;
+            _finish = true;
        }
-        if (_fwdPos < _buffer.Length) _fwdPos++;
    }

    private void GetNbc() {
@@ -622,24 +718,25 @@ public class Lexer(string source)
        {
            if (delimiter.Contains(_ch))
            {
-                if (_ch != ':')
-                {
-                    return true;
-                }
-
-                GetChar();
-                if (_ch == '=')
-                {
-                    Retract();
-                    return false;
-                }
-
                return true;
            }
        }
        return false;
    }

+    private char PeekNextChar()
+    {
+        // 确认下一个位置是否仍在buffer的范围内
+        if (_fwdPos < source.Length)
+        {
+            return source[_fwdPos];
+        }
+        return '\0';
+
+    }
+
+
+
    private void PrintToken(SemanticTokenType type, LinkedListNode<char> token, uint line)
    {
        string tokenString = LinkedListToString(token);
--- a/Canon.Core/LexicalParser/SemanticToken.cs
+++ b/Canon.Core/LexicalParser/SemanticToken.cs
@@ -4,12 +4,10 @@ namespace Canon.Core.LexicalParser;

 using Enums;

-using System.Text;
-
 /// <summary>
 /// 词法记号基类
 /// </summary>
-public abstract class SemanticToken
+public abstract class SemanticToken : IEquatable<SemanticToken>
 {
    public abstract SemanticTokenType TokenType { get; }

@@ -59,7 +57,34 @@ public abstract class SemanticToken
        LinePos = 0, CharacterPos = 0, LiteralValue = string.Empty
    };

-    public override string ToString() => LiteralValue;
+    public override string ToString()
+    {
+        return $"LinePos: {LinePos}, CharacterPos: {CharacterPos}, LiteralValue: {LiteralValue}, TokenType: {TokenType}";
+    }
+
+    public bool Equals(SemanticToken? other)
+    {
+        if (other == null)
+            return false;
+
+        return LinePos == other.LinePos &&
+               CharacterPos == other.CharacterPos &&
+               LiteralValue == other.LiteralValue &&
+               TokenType == other.TokenType;
+    }
+
+    public override bool Equals(object? obj)
+    {
+        return obj is SemanticToken semanticTokenObj && Equals(semanticTokenObj);
+    }
+
+    public override int GetHashCode()
+    {
+        return LinePos.GetHashCode() ^
+               CharacterPos.GetHashCode() ^
+               LiteralValue.GetHashCode() ^
+               TokenType.GetHashCode();
+    }
 }

 /// <summary>
@@ -118,6 +143,11 @@ public class DelimiterSemanticToken : SemanticToken
        };
        return true;
    }
+
+    public override int GetHashCode()
+    {
+        return base.GetHashCode() ^ this.DelimiterType.GetHashCode();
+    }
 }

 /// <summary>
@@ -218,6 +248,11 @@ public class KeywordSemanticToken : SemanticToken
        token = null;
        return false;
    }
+
+    public override int GetHashCode()
+    {
+        return base.GetHashCode() ^ this.KeywordType.GetHashCode();
+    }
 }

 /// <summary>
@@ -229,12 +264,44 @@ public class OperatorSemanticToken : SemanticToken

    public required OperatorType OperatorType { get; init; }

+    public static readonly Dictionary<string, OperatorType> OperatorTypes = new Dictionary<string, OperatorType>
+    {
+        { "=", OperatorType.Equal },
+        { "<>", OperatorType.NotEqual },
+        { "<", OperatorType.Less },
+        { "<=", OperatorType.LessEqual },
+        { ">", OperatorType.Greater },
+        { ">=", OperatorType.GreaterEqual },
+        { "+", OperatorType.Plus },
+        { "-", OperatorType.Minus },
+        { "*", OperatorType.Multiply },
+        { "/", OperatorType.Divide },
+        { ":=", OperatorType.Assign }
+    };
+
+    public static OperatorType GetOperatorTypeByOperator(string operatorSymbol)
+    {
+        if (OperatorTypes.TryGetValue(operatorSymbol, out var operatorType))
+        {
+            return operatorType;
+        }
+        else
+        {
+            throw new ArgumentException($"Unknown operator: {operatorSymbol}");
+        }
+    }
+
    public static bool TryParse(uint linePos, uint characterPos, LinkedListNode<char> now,
        out OperatorSemanticToken? token)
    {
        token = null;
        return false;
    }
+
+    public override int GetHashCode()
+    {
+        return base.GetHashCode() ^ this.OperatorType.GetHashCode();
+    }
 }

 /// <summary>
@@ -245,65 +312,10 @@ public class NumberSemanticToken : SemanticToken
    public override SemanticTokenType TokenType => SemanticTokenType.Number;

    public required NumberType NumberType { get; init; }
-    public double Value { get; private init; }

-    public static bool TryParse(uint linePos, uint characterPos, LinkedListNode<char> now,
-        out NumberSemanticToken? token)
+    public override int GetHashCode()
    {
-        StringBuilder buffer = new();
-
-        bool hasDecimalPoint = false;
-        bool hasExponent = false;
-        bool hasMinusSign = false;
-
-        while (now != null && (char.IsDigit(now.Value) || now.Value == '.' || now.Value == 'e' || now.Value == 'E' || now.Value == '-' || now.Value == '+'))
-        {
-            if (now.Value == '.')
-            {
-                if (hasDecimalPoint)
-                {
-                    break;
-                }
-                hasDecimalPoint = true;
-            }
-
-            if (now.Value == 'e' || now.Value == 'E')
-            {
-                if (hasExponent)
-                {
-                    break;
-                }
-                hasExponent = true;
-            }
-
-            if (now.Value == '-' || now.Value == '+')
-            {
-                if (hasMinusSign)
-                {
-                    break;
-                }
-                hasMinusSign = true;
-            }
-
-            buffer.Append(now.Value);
-            now = now.Next;
-        }
-
-        if (double.TryParse(buffer.ToString(), out double value))
-        {
-            token = new NumberSemanticToken
-            {
-                LinePos = linePos,
-                CharacterPos = characterPos,
-                LiteralValue = buffer.ToString(),
-                Value = value,
-                NumberType = hasDecimalPoint || hasExponent ? NumberType.Real : NumberType.Integer
-            };
-            return true;
-        }
-
-        token = null;
-        return false;
+        return base.GetHashCode() ^ this.NumberType.GetHashCode();
    }
 }

--- a/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs
@@ -0,0 +1,54 @@
+using Canon.Core.Enums;
+using Canon.Core.LexicalParser;
+using Xunit.Abstractions;
+using Canon.Core.Exceptions;
+
+namespace Canon.Tests.LexicalParserTests
+{
+    public class CharacterTypeTests
+    {
+        private readonly ITestOutputHelper _testOutputHelper;
+
+        public CharacterTypeTests(ITestOutputHelper testOutputHelper)
+        {
+            _testOutputHelper = testOutputHelper;
+        }
+
+        [Theory]
+        [InlineData("'a'", "a")]
+        [InlineData("'Hello, World!'", "Hello, World!")]
+
+        public void TestCharacterType(string input, string? expectedResult)
+        {
+            Lexer lexer = new(input);
+            if (expectedResult == null)
+            {
+                Assert.Throws<LexemeException>(() => lexer.Tokenize());
+            }
+            else
+            {
+                List<SemanticToken> tokens = lexer.Tokenize();
+                _testOutputHelper.WriteLine(tokens[0].LiteralValue);
+                Assert.Single(tokens);
+                Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType);
+                Assert.Equal(expectedResult, tokens[0].LiteralValue);
+            }
+        }
+
+        [Theory]
+        //[InlineData("'\\x'", 1, 2, LexemeException.LexemeErrorType.InvalidEscapeSequence)]
+        [InlineData("'This is an unclosed string literal", 1, 36, LexemeErrorType.UnclosedStringLiteral)]
+        [InlineData("'This", 1, 6, LexemeErrorType.UnclosedStringLiteral)]
+        [InlineData("x @", 1, 3, LexemeErrorType.UnknownCharacterOrString)]
+        //[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)]
+        public void TestParseCharacterError(string input,  uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
+        {
+            Lexer lexer = new(input);
+            var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
+            _testOutputHelper.WriteLine(ex.ToString());
+            Assert.Equal(expectedErrorType, ex.ErrorType);
+            Assert.Equal(expectedLine, ex.Line);
+            Assert.Equal(expectedCharPosition, ex.CharPosition);
+        }
+    }
+}
--- a/Canon.Tests/LexicalParserTests/DelimiterTests.cs
+++ b/Canon.Tests/LexicalParserTests/DelimiterTests.cs
@@ -7,7 +7,7 @@ public class DelimiterTests
 {
    [Theory]
    [InlineData(",123", DelimiterType.Comma)]
-    [InlineData(".123", DelimiterType.Period)]
+    // [InlineData(".123", DelimiterType.Period)]
    [InlineData(":123", DelimiterType.Colon)]
    [InlineData(";123", DelimiterType.Semicolon)]
    [InlineData("(123)", DelimiterType.LeftParenthesis)]
--- a/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs
+++ b/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs
@@ -0,0 +1,32 @@
+using Canon.Core.LexicalParser;
+using Canon.Core.Exceptions;
+using Xunit.Abstractions;
+using Canon.Core.Enums;
+
+namespace Canon.Tests.LexicalParserTests
+{
+    public class ErrorSingleTests
+    {
+        private readonly ITestOutputHelper _testOutputHelper;
+        public ErrorSingleTests(ITestOutputHelper testOutputHelper)
+        {
+            _testOutputHelper = testOutputHelper;
+        }
+
+        [Theory]
+        [InlineData("program main; var a: integer; begin a := 3#; end.", 1, 43, LexemeErrorType.IllegalNumberFormat)]
+        [InlineData("char c = 'abc;", 1, 15, LexemeErrorType.UnclosedStringLiteral)]
+        [InlineData("x := 10 @;", 1, 9, LexemeErrorType.UnknownCharacterOrString)]
+        [InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)]
+        public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
+        {
+            var lexer = new Lexer(pascalProgram);
+
+            var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
+            _testOutputHelper.WriteLine(ex.ToString());
+            Assert.Equal(expectedErrorType, ex.ErrorType);
+            Assert.Equal(expectedLine, ex.Line);
+            Assert.Equal(expectedCharPosition, ex.CharPosition);
+        }
+    }
+}
--- a/Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs
@@ -1,6 +1,5 @@
 using Canon.Core.Enums;
 using Canon.Core.LexicalParser;
-using Xunit;

 namespace Canon.Tests.LexicalParserTests
 {
@@ -10,20 +9,15 @@ namespace Canon.Tests.LexicalParserTests
        [InlineData("identifier", true)]
        [InlineData("_identifier", true)]
        [InlineData("identifier123", true)]
-        [InlineData("123identifier", false)]
        [InlineData("identifier_with_underscores", true)]
        [InlineData("IdentifierWithCamelCase", true)]
-        [InlineData("identifier-with-hyphen", false)]
-        [InlineData("identifier with spaces", false)]
-        [InlineData("identifier_with_special_chars@#", false)]
-        [InlineData("", false)]
-        [InlineData(" ", false)]
-        [InlineData("andand", false)]
+        [InlineData("andand", true)]
        public void TestParseIdentifier(string input, bool expectedResult)
        {
            Lexer lexer = new(input);
            List<SemanticToken> tokens = lexer.Tokenize();

+            Assert.Single(tokens);
            Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier);
        }
    }
--- a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
@@ -21,6 +21,7 @@ public class KeywordTypeTests
    [InlineData("for", KeywordType.For)]
    [InlineData("to", KeywordType.To)]
    [InlineData("do", KeywordType.Do)]
+    [InlineData("DO", KeywordType.Do)]
    public void SmokeTest(string input, KeywordType type)
    {
        Lexer lexer = new(input);
--- a/Canon.Tests/LexicalParserTests/LexicalFileTests.cs
+++ b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs
@@ -0,0 +1,312 @@
+using System.Text.RegularExpressions;
+using Canon.Core.Enums;
+using Canon.Core.Exceptions;
+using Canon.Core.LexicalParser;
+using Xunit.Abstractions;
+
+namespace Canon.Tests.LexicalParserTests;
+
+public class LexicalFileTests
+{
+    private readonly ITestOutputHelper _testOutputHelper;
+
+    public LexicalFileTests(ITestOutputHelper testOutputHelper)
+    {
+        _testOutputHelper = testOutputHelper;
+    }
+
+    //TODO: 基础的字符串匹配，因此变量名称不能被包含。手写一个存在包含情况的测试文件。
+    private static (int, int) FindNthPosition(string pascalProgram, string target, int occurrence)
+    {
+        int lineNumber = 0;
+        (int, int) nthPosition = (0, 0);
+        int foundCount = 0;
+        occurrence = occurrence + 1;
+
+        using (StringReader sr = new StringReader(pascalProgram))
+        {
+            string line;
+            while ((line = sr.ReadLine()) != null)
+            {
+                lineNumber++;
+                int columnNumber = -1;
+
+                // line = Regex.Replace(line, "'[^']*'", "$");
+
+                while ((columnNumber = line.IndexOf(target, columnNumber + 1, StringComparison.Ordinal)) != -1)
+                {
+                    foundCount++;
+                    if (foundCount == occurrence)
+                    {
+                        nthPosition = (lineNumber, columnNumber + target.Length);
+                        return nthPosition;
+                    }
+                }
+            }
+        }
+
+        if (nthPosition == (0, 0))
+        {
+            throw new Exception($"'{target}' not found in program.");
+        }
+
+        return nthPosition;
+    }
+
+    private void TestLexicalAnalysis(string pascalProgram, List<(string, SemanticTokenType, int)> stringLiterals)
+    {
+        var expectedTokens = new List<SemanticToken>();
+
+        foreach (var (literal, tokenType, skipCount) in stringLiterals)
+        {
+            var (line, column) = FindNthPosition(pascalProgram, literal, skipCount);
+            switch (tokenType)
+            {
+                case SemanticTokenType.Keyword:
+                    expectedTokens.Add(new KeywordSemanticToken
+                    {
+                        LinePos = (uint)line,
+                        CharacterPos = (uint)column,
+                        LiteralValue = literal,
+                        KeywordType = KeywordSemanticToken.GetKeywordTypeByKeyword(literal)
+                    });
+                    break;
+                case SemanticTokenType.Identifier:
+                    expectedTokens.Add(new IdentifierSemanticToken
+                    {
+                        LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal
+                    });
+                    break;
+                case SemanticTokenType.Delimiter:
+                    if (DelimiterSemanticToken.TryParse((uint)line, (uint)column, new LinkedListNode<char>(literal[0]),
+                            out var delimiterToken))
+                    {
+                        if (delimiterToken != null)
+                        {
+                            expectedTokens.Add(delimiterToken);
+                        }
+                    }
+
+                    break;
+                case SemanticTokenType.Operator:
+                    expectedTokens.Add(new OperatorSemanticToken
+                    {
+                        LinePos = (uint)line,
+                        CharacterPos = (uint)column,
+                        LiteralValue = literal,
+                        OperatorType = OperatorSemanticToken.GetOperatorTypeByOperator(literal)
+                    });
+                    break;
+                case SemanticTokenType.Character:
+                    expectedTokens.Add(new CharacterSemanticToken
+                    {
+                        LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal
+                    });
+                    break;
+                case SemanticTokenType.Number:
+                    expectedTokens.Add(new NumberSemanticToken
+                    {
+                        LinePos = (uint)line,
+                        CharacterPos = (uint)column,
+                        LiteralValue = literal,
+                        NumberType = NumberType.Integer
+                    });
+                    break;
+            }
+        }
+
+        expectedTokens = expectedTokens.OrderBy(token => token.LinePos).ThenBy(token => token.CharacterPos).ToList();
+        expectedTokens = expectedTokens.Select(token =>
+            token is CharacterSemanticToken characterToken && characterToken.LiteralValue == "hello, world!"
+                ? new CharacterSemanticToken
+                {
+                    LinePos = characterToken.LinePos,
+                    CharacterPos = characterToken.CharacterPos + 1,
+                    LiteralValue = characterToken.LiteralValue
+                }
+                : token).ToList();
+
+        var lexer = new Lexer(pascalProgram);
+        var actualTokens = lexer.Tokenize();
+        for (int i = 0; i < expectedTokens.Count; i++)
+        {
+            _testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}");
+            _testOutputHelper.WriteLine($"Actual: {actualTokens[i]}");
+            _testOutputHelper.WriteLine("----");
+            Assert.Equal(expectedTokens[i], actualTokens[i]);
+        }
+
+        Assert.Equal(expectedTokens, actualTokens);
+    }
+
+    [Fact]
+    public void TestLexicalAnalysisFirst()
+    {
+        string pascalProgram = """
+        program HelloWorld;
+        var
+        message: string;
+        begin
+        message := 'hello, world!';
+        writeln(message);
+        end.
+        """;
+
+        var stringLiterals = new List<(string, SemanticTokenType, int)>
+        {
+            ("program", SemanticTokenType.Keyword, 0),
+            ("HelloWorld", SemanticTokenType.Identifier, 0),
+            (";", SemanticTokenType.Delimiter, 0),
+            ("var", SemanticTokenType.Keyword, 0),
+            ("message", SemanticTokenType.Identifier, 0),
+            (":", SemanticTokenType.Delimiter, 0),
+            ("string", SemanticTokenType.Identifier, 0),
+            (";", SemanticTokenType.Delimiter, 1),
+            ("begin", SemanticTokenType.Keyword, 0),
+            ("message", SemanticTokenType.Identifier, 1),
+            (":=", SemanticTokenType.Operator, 0),
+            ("hello, world!", SemanticTokenType.Character, 0),
+            (";", SemanticTokenType.Delimiter, 2),
+            ("writeln", SemanticTokenType.Identifier, 0),
+            ("(", SemanticTokenType.Delimiter, 0),
+            ("message", SemanticTokenType.Identifier, 2),
+            (")", SemanticTokenType.Delimiter, 0),
+            (";", SemanticTokenType.Delimiter, 3),
+            ("end", SemanticTokenType.Keyword, 0),
+            (".", SemanticTokenType.Delimiter, 0)
+        };
+        TestLexicalAnalysis(pascalProgram, stringLiterals);
+    }
+
+    [Fact]
+    public void TestLexicalAnalysisSecond()
+    {
+        string pascalProgram = """
+        program main;
+        var
+          ab: integer;
+        begin
+          ab := 3;
+          write(ab);
+        end.
+        """;
+
+        var stringLiterals = new List<(string, SemanticTokenType, int)>
+        {
+            ("program", SemanticTokenType.Keyword, 0),
+            ("main", SemanticTokenType.Identifier, 0),
+            (";", SemanticTokenType.Delimiter, 0),
+            ("var", SemanticTokenType.Keyword, 0),
+            ("ab", SemanticTokenType.Identifier, 0),
+            (":", SemanticTokenType.Delimiter, 0),
+            ("integer", SemanticTokenType.Keyword, 0),
+            (";", SemanticTokenType.Delimiter, 1),
+            ("begin", SemanticTokenType.Keyword, 0),
+            ("ab", SemanticTokenType.Identifier, 1),
+            (":=", SemanticTokenType.Operator, 0),
+            ("3", SemanticTokenType.Number, 0),
+            (";", SemanticTokenType.Delimiter, 2),
+            ("write", SemanticTokenType.Identifier, 0),
+            ("(", SemanticTokenType.Delimiter, 0),
+            ("ab", SemanticTokenType.Identifier, 2),
+            (")", SemanticTokenType.Delimiter, 0),
+            (";", SemanticTokenType.Delimiter, 3),
+            ("end", SemanticTokenType.Keyword, 0),
+            (".", SemanticTokenType.Delimiter, 0)
+        };
+        TestLexicalAnalysis(pascalProgram, stringLiterals);
+    }
+
+    //带注释的测试
+    [Fact]
+    public void TestLexicalAnalysisThird()
+    {
+        string pascalProgram = """
+                                {test}
+                                program main;
+                                var
+                                  ab, ba: integer;
+                                begin
+                                  ab := 3;
+                                  ba := 5;
+                                  ab := 5;
+                                  write(ab + ba);
+                                end.
+                                """;
+
+        var stringLiterals = new List<(string, SemanticTokenType, int)>
+        {
+            ("program", SemanticTokenType.Keyword, 0),
+            ("main", SemanticTokenType.Identifier, 0),
+            (";", SemanticTokenType.Delimiter, 0),
+            ("var", SemanticTokenType.Keyword, 0),
+            ("ab", SemanticTokenType.Identifier, 0),
+            (",", SemanticTokenType.Delimiter, 0),
+            ("ba", SemanticTokenType.Identifier, 0),
+            (":", SemanticTokenType.Delimiter, 0),
+            ("integer", SemanticTokenType.Keyword, 0),
+            (";", SemanticTokenType.Delimiter, 1),
+            ("begin", SemanticTokenType.Keyword, 0),
+            ("ab", SemanticTokenType.Identifier, 1),
+            (":=", SemanticTokenType.Operator, 0),
+            ("3", SemanticTokenType.Number, 0),
+            (";", SemanticTokenType.Delimiter, 2),
+            ("ba", SemanticTokenType.Identifier, 1),
+            (":=", SemanticTokenType.Operator, 1),
+            ("5", SemanticTokenType.Number, 0),
+            (";", SemanticTokenType.Delimiter, 3),
+            ("ab", SemanticTokenType.Identifier, 2),
+            (":=", SemanticTokenType.Operator, 2),
+            ("5", SemanticTokenType.Number, 1),
+            (";", SemanticTokenType.Delimiter, 4),
+            ("write", SemanticTokenType.Identifier, 0),
+            ("(", SemanticTokenType.Delimiter, 0),
+            ("ab", SemanticTokenType.Identifier, 3),
+            ("+", SemanticTokenType.Operator, 0),
+            ("ba", SemanticTokenType.Identifier, 2),
+            (")", SemanticTokenType.Delimiter, 0),
+            (";", SemanticTokenType.Delimiter, 5),
+            ("end", SemanticTokenType.Keyword, 0),
+            (".", SemanticTokenType.Delimiter, 0)
+        };
+        TestLexicalAnalysis(pascalProgram, stringLiterals);
+    }
+
+    [Fact]
+    public void UnclosedCommentFirst()
+    {
+        string pascalProgram = """
+                                (* This is an example of an unclosed comment
+                                program CommentError;
+                                var
+                                    x: integer;
+                                begin
+                                    x := 42;
+                                end.
+                                """;
+        var lexer = new Lexer(pascalProgram);
+        var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
+        //打印exception信息
+        _testOutputHelper.WriteLine(ex.ToString());
+        Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
+        Assert.Equal((uint)7, ex.Line);
+        Assert.Equal((uint)5, ex.CharPosition);
+    }
+
+    [Fact]
+    public void UnclosedCommentSecond()
+    {
+        string pascalProgram = """
+                               {
+                                   This is a block comment that does not close.
+
+                               program CommentNotClosed;
+                               """;
+        var lexer = new Lexer(pascalProgram);
+        var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
+_testOutputHelper.WriteLine(ex.ToString());
+        Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType);
+        Assert.Equal((uint)4, ex.Line);
+        Assert.Equal((uint)26, ex.CharPosition);
+    }
+}
--- a/Canon.Tests/LexicalParserTests/NumberTests.cs
+++ b/Canon.Tests/LexicalParserTests/NumberTests.cs
@@ -1,46 +1,58 @@
 using Canon.Core.Enums;
 using Canon.Core.LexicalParser;
+using Canon.Core.Exceptions;
+using Xunit.Abstractions;

 namespace Canon.Tests.LexicalParserTests
 {
+
    public class NumberTests
    {
+        private readonly ITestOutputHelper _testOutputHelper;
+        public NumberTests(ITestOutputHelper testOutputHelper)
+        {
+            _testOutputHelper = testOutputHelper;
+        }
+
        [Theory]
-        [InlineData("123", 123, NumberType.Integer)]
-        [InlineData("0", 0, NumberType.Integer)]
-        [InlineData("-123", -123, NumberType.Integer)]
-        [InlineData("1.23", 1.23, NumberType.Real)]
-        [InlineData("-1.23", -1.23, NumberType.Real)]
-        [InlineData("0.0", 0.0, NumberType.Real)]
-        [InlineData("1e7", 1e7, NumberType.Real)]
-        [InlineData("1E7", 1E7, NumberType.Real)]
-        [InlineData("1.23e-7", 1.23e-7, NumberType.Real)]
-        [InlineData("1.23E-7", 1.23E-7, NumberType.Real)]
-        [InlineData("1234567890", 1234567890, NumberType.Integer)]
-        [InlineData("1234567890.1234567890", 1234567890.1234567890, NumberType.Real)]
-        [InlineData("-1234567890", -1234567890, NumberType.Integer)]
-        [InlineData("-1234567890.1234567890", -1234567890.1234567890, NumberType.Real)]
-        [InlineData("1e-7", 1e-7, NumberType.Real)]
-        [InlineData("1E-7", 1E-7, NumberType.Real)]
-        [InlineData("1E", 0, NumberType.Real, false)]
-        [InlineData("abc", 0, NumberType.Integer, false)]
-        [InlineData("123abc", 123, NumberType.Integer, true)]
-        public void TestParseNumber(string input, double expected, NumberType expectedNumberType,
-            bool expectedResult = true)
+        [InlineData("123", "123", NumberType.Integer)]
+        [InlineData("0", "0", NumberType.Integer)]
+        [InlineData("1.23", "1.23", NumberType.Real)]
+        [InlineData("0.0", "0.0", NumberType.Real)]
+        [InlineData("1e7", "1e7", NumberType.Real)]
+        [InlineData("1E7", "1E7", NumberType.Real)]
+        [InlineData("1.23e-7", "1.23e-7", NumberType.Real)]
+        [InlineData("1.23E-7", "1.23E-7", NumberType.Real)]
+        [InlineData("1234567890", "1234567890", NumberType.Integer)]
+        [InlineData("1234567890.1234567890", "1234567890.1234567890", NumberType.Real)]
+        [InlineData("1e-7", "1e-7", NumberType.Real)]
+        [InlineData("1E-7", "1E-7", NumberType.Real)]
+        [InlineData(".67",".67", NumberType.Real)]
+        [InlineData("$123", "0x123", NumberType.Hex)]
+        public void TestParseNumber(string input, string expected, NumberType expectedNumberType)
        {
            Lexer lexer = new(input);
            List<SemanticToken> tokens = lexer.Tokenize();
-
            SemanticToken token = tokens[0];
-            if (!expectedResult)
-            {
-                Assert.NotEqual(SemanticTokenType.Keyword, token.TokenType);
-                return;
-            }
            Assert.Equal(SemanticTokenType.Number, token.TokenType);
            NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token;
            Assert.Equal(expectedNumberType, numberSemanticToken.NumberType);
-            Assert.Equal(expected, numberSemanticToken.Value);
+            Assert.Equal(expected, numberSemanticToken.LiteralValue);
+        }
+
+        [Theory]
+        [InlineData("1E",  1, 3, LexemeErrorType.IllegalNumberFormat)]
+        [InlineData("123abc",  1, 4, LexemeErrorType.IllegalNumberFormat)]
+        [InlineData("123.45.67",  1, 7, LexemeErrorType.IllegalNumberFormat)]
+        [InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)]
+        public void TestParseNumberError(string input,  uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType)
+        {
+            Lexer lexer = new(input);
+            var ex = Assert.Throws<LexemeException>(() => lexer.Tokenize());
+            _testOutputHelper.WriteLine(ex.ToString());
+            Assert.Equal(expectedErrorType, ex.ErrorType);
+            Assert.Equal(expectedLine, ex.Line);
+            Assert.Equal(expectedCharPosition, ex.CharPosition);
        }
    }
 }
--- a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
@@ -6,38 +6,33 @@ namespace Canon.Tests.LexicalParserTests;
 public class OperatorTypeTests
 {
    [Theory]
-    [InlineData("+ 123", OperatorType.Plus)]
-    [InlineData("+123", OperatorType.Plus)]
-    [InlineData("-123", OperatorType.Minus)]
-    [InlineData("*123", OperatorType.Multiply)]
-    [InlineData("/123", OperatorType.Divide)]
-    [InlineData("=123", OperatorType.Equal)]
-    [InlineData("<123", OperatorType.Less)]
-    [InlineData(">123", OperatorType.Greater)]
-    [InlineData("<=123", OperatorType.LessEqual)]
-    [InlineData(">=123", OperatorType.GreaterEqual)]
-    [InlineData("<>123", OperatorType.NotEqual)]
-    [InlineData(":=123", OperatorType.Assign)]
-    public void ParseTest(string input, OperatorType result)
+    [InlineData("+ 123", OperatorType.Plus, true)]
+    [InlineData("+123", OperatorType.Plus, true)]
+    [InlineData("-123", OperatorType.Minus, true)]
+    [InlineData("*123", OperatorType.Multiply, true)]
+    [InlineData("/123", OperatorType.Divide, true)]
+    [InlineData("=123", OperatorType.Equal, true)]
+    [InlineData("<123", OperatorType.Less, true)]
+    [InlineData(">123", OperatorType.Greater, true)]
+    [InlineData("<=123", OperatorType.LessEqual, true)]
+    [InlineData(">=123", OperatorType.GreaterEqual, true)]
+    [InlineData("<>123", OperatorType.NotEqual, true)]
+    [InlineData(":=123", OperatorType.Assign, true)]
+    [InlineData("1 + 123", OperatorType.Plus, false)]
+    [InlineData("m +123", OperatorType.Plus, false)]
+    public void ParseTest(string input, OperatorType result, bool expectedResult)
    {
        Lexer lexer = new(input);
        List<SemanticToken> tokens = lexer.Tokenize();

        SemanticToken token = tokens[0];
+        if (!expectedResult)
+        {
+            Assert.NotEqual(SemanticTokenType.Operator, token.TokenType);
+            return;
+        }
        Assert.Equal(SemanticTokenType.Operator, token.TokenType);
        OperatorSemanticToken operatorSemanticToken = (OperatorSemanticToken)token;
        Assert.Equal(result, operatorSemanticToken.OperatorType);
    }
-
-    [Theory]
-    [InlineData("1 + 123")]
-    [InlineData("m +123")]
-    public void ParseFailedTest(string input)
-    {
-        Lexer lexer = new(input);
-        List<SemanticToken> tokens = lexer.Tokenize();
-
-        SemanticToken token = tokens[0];
-        Assert.NotEqual(SemanticTokenType.Operator, token.TokenType);
-    }
 }