diff --git a/Canon.Core/Enums/ErrorEnums.cs b/Canon.Core/Enums/ErrorEnums.cs new file mode 100644 index 0000000..18ded10 --- /dev/null +++ b/Canon.Core/Enums/ErrorEnums.cs @@ -0,0 +1,11 @@ +namespace Canon.Core.Enums; + +public enum LexemeErrorType +{ + IllegalNumberFormat,//数字格式不正确 + UnknownCharacterOrString,//源代码包含无法识别的字符或字符串 + UnclosedStringLiteral,//字符串字面量未闭合 + UnclosedComment,//注释未闭合 + InvalidEscapeSequence,//无效的转义字符 + IllegalOperator,//非法的操作符 +} diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs index 3e3bf7f..13be0ec 100644 --- a/Canon.Core/Enums/SemanticEnums.cs +++ b/Canon.Core/Enums/SemanticEnums.cs @@ -93,7 +93,7 @@ public enum StateType Word, Digit, Delimiter, - Other + Operator } public enum BasicIdType diff --git a/Canon.Core/Exceptions/LexemeException.cs b/Canon.Core/Exceptions/LexemeException.cs index b20c4ed..40ec791 100644 --- a/Canon.Core/Exceptions/LexemeException.cs +++ b/Canon.Core/Exceptions/LexemeException.cs @@ -1,9 +1,13 @@ namespace Canon.Core.Exceptions; +using Enums; /// /// 词法分析中引发的异常 /// public class LexemeException : Exception { + public LexemeErrorType ErrorType { get; } + public uint Line { get; } + public uint CharPosition { get; } public LexemeException() { } public LexemeException(string message) : base(message) { } @@ -11,15 +15,20 @@ public class LexemeException : Exception public LexemeException(string message, Exception innerException) : base(message, innerException) { } + /// 错误类型 /// 单词的行号 /// 单词的列号 /// 错误信息 - public LexemeException(uint line, uint charPosition, string message) : - base("line:" + line + ", charPosition:" + charPosition + " :" + message) { } + public LexemeException(LexemeErrorType errorType, uint line, uint charPosition, string message) : + base("line:" + line + ", charPosition:" + charPosition + " :" + message) + { + ErrorType = errorType; + Line = line; + CharPosition = charPosition; + } - public LexemeException(uint line, uint charPosition, Exception innerException) : - base("line:" + line + ", charPosition:" + charPosition + " : ", innerException) { } - - public LexemeException(uint line, uint charPosition, string message, Exception innerException) : - base("line:" + line + ", charPosition:" + charPosition + " :" + message, innerException) { } + public override string ToString() + { + return $"LexemeException: ErrorType={ErrorType}, Line={Line}, CharPosition={CharPosition}, Message={Message}\n"; + } } diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs index 516de9b..d1a55c1 100644 --- a/Canon.Core/LexicalParser/Lexer.cs +++ b/Canon.Core/LexicalParser/Lexer.cs @@ -1,5 +1,7 @@ +using System.Numerics; using System.Text; using Canon.Core.Enums; +using Canon.Core.Exceptions; namespace Canon.Core.LexicalParser; @@ -17,27 +19,31 @@ public class Lexer(string source) "Not", "Mod", "And", "Or" ]; - private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]","'","\"",".."]; + private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."]; + + private readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="]; // 状态机 private StateType _state; private char _ch; private LinkedList _token = new LinkedList(); + // bool save; // int saved_state; bool _finish; - private bool eof; + //缓冲区 private readonly char[] _buffer = new char[2048]; + // int start_pos; private int _fwdPos; // 计数器 private uint _line = 1; private uint _chPos; - private int _sourcePos; + private readonly Dictionary _tokenCount = new Dictionary { { SemanticTokenType.Keyword, 0 }, @@ -57,234 +63,394 @@ public class Lexer(string source) // 缓冲区 // start_pos = 0; _fwdPos = 0; - FillLeftBuffer(); // 状态机 _finish = false; - while (!_finish) { + while (!_finish) + { GetChar(); GetNbc(); + if (_finish) break; _token = new LinkedList(); - if (IsLetter()) { + if (IsLetter()) + { _state = StateType.Word; } - else if (IsDigit()) { + else if(_ch == '.') + { + char next = PeekNextChar(); + if (next >= '0' && next <= '9') + { + _state = StateType.Digit; + } + else + { + _state = StateType.Delimiter; + } + } + else if (IsDigit() || _ch == '$') + { _state = StateType.Digit; } - else if (IsDelimiter()) { + else if (IsDelimiter()) + { _state = StateType.Delimiter; } + else if (_ch == '{') + { + GetChar(); + while (_ch != '}') + { + GetChar(); + if (_ch == '\n') + { + _line++; + _chPos = 0; + } + if (_finish) + { + throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed."); + } + + } + + continue; + } else { - _state = StateType.Other; + _state = StateType.Operator; } switch (_state) { - case StateType.Word: { - while (IsDigit() || IsLetter()) + case StateType.Word: + while (IsDigit() || IsLetter()) + { + Cat(); + GetChar(); + } + + Retract(); + + if (IsKeyword()) + { + KeywordType keywordType = + KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First)); + MakeToken(keywordType); + } + else + { + MakeToken(SemanticTokenType.Identifier); + } + + break; + case StateType.Digit: + DealNumber(); + break; + case StateType.Delimiter: + Cat(); + switch (_ch) + { + case '.': + { + GetChar(); + if (_ch == '.') + { + Cat(); + MakeToken(DelimiterType.DoubleDots); + break; + } + + Retract(); + if (IsDot()) + { + MakeToken(DelimiterType.Dot); + } + else + { + MakeToken(DelimiterType.Period); + } + } + break; + case '\'': + case '\"': + { + // 重置_token,准备收集字符串内容 + _token = new LinkedList(); + + GetChar(); // 移动到下一个字符,即字符串的第一个字符 + while (_ch != '\'' && _ch != '\"') + { + Cat(); // 收集字符 + GetChar(); // 移动到下一个字符 + if (_ch == '\n' || _finish) + { + throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos, "The String is not closed."); + } + } + + MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType + _token = new LinkedList(); // 重置_token + + if (!(_ch == '\'' || _ch == '\"')) + { + throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos, "The String is not closed."); + } + } + break; + case ',': + MakeToken(DelimiterType.Comma); + break; + case ':': + char nextChar = PeekNextChar(); + if (nextChar == '=') + { + GetChar(); + Cat(); + MakeToken(OperatorType.Assign); + } + else + { + MakeToken(DelimiterType.Colon); + } + + break; + case ';': + MakeToken(DelimiterType.Semicolon); + break; + case '(': + char next = PeekNextChar(); + if (next == '*') + { + GetChar(); + bool commentClosed = false; + while (!commentClosed) + { + GetNbc(); + GetChar(); + while (_ch != '*') + { + GetNbc(); + GetChar(); + if (_finish) + { + throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed."); + } + } + + GetChar(); + if (_finish) + { + throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos, "The comment is not closed."); + } + + if (_ch == ')') commentClosed = true; + } + } + else + { + MakeToken(DelimiterType.LeftParenthesis); + } + + break; + case ')': + MakeToken(DelimiterType.RightParenthesis); + break; + case '[': + MakeToken(DelimiterType.LeftSquareBracket); + break; + case ']': + MakeToken(DelimiterType.RightSquareBracket); + break; + } + + break; + case StateType.Operator: + DealOther(); + break; + default: + throw new ArgumentOutOfRangeException(); + } + + } + + return _tokens; + } + + private void DealNumber() + { + // 十六进制 + if (_ch == '$') + { + Cat(); + + GetChar(); + while (!NumberShouldBreak()) + { + // 假设IsHexDigit方法能够识别十六进制数字 + if (IsHexDigit()) { Cat(); GetChar(); } - Retract(); - - if (IsKeyword()) + else if(NumberShouldBreak()) { - KeywordType keywordType = - KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First)); - MakeToken(keywordType); + break; } - else { - MakeToken(SemanticTokenType.Identifier); + else + { + throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal hex numbers!"); } - break; } + MakeToken(NumberType.Hex); + return; + } - case StateType.Digit: + // 非十六进制 + if(IsDigit() || _ch == '.') + { + while (!NumberShouldBreak()) + { + // 含小数部分 + if (_ch == '.') { - bool error = false; - bool tag = false; // 用于标记是否已经处理过科学记数法的指数部分 - bool doubleDot = false; - NumberType numberType = NumberType.Integer; - - while (IsDigit() || _ch == '.' || _ch == 'E' || _ch == '+' || _ch == '-' || _ch == 'e' || IsLetter()) { - if (_ch != '.') - { - Cat(); - } - - - if (_ch == '0' && !tag) { - GetChar(); - if (_ch == 'x' || _ch == 'X') { - numberType = NumberType.Hex; // 标识十六进制 - Cat(); - while (IsHexDigit()) { // 假设IsHexDigit方法能够识别十六进制数字 - Cat(); - } - break; - } - Retract(); // 如果不是'x'或'X',回退一个字符 - } - else if (_ch == '.') { - GetChar(); - if (_ch == '.') { - Retract(); // 回退到第一个'.' - Retract(); // 回退到'.'之前的数字 - doubleDot = true; - break; - } - Retract(); - Cat(); - numberType = NumberType.Real; - } - else if ((_ch == 'e' || _ch == 'E') && !tag) { - GetChar(); - if (IsDigit() || _ch == '+' || _ch == '-') { - Cat(); - tag = true; // 已处理指数部分 - continue; - } - error = true; // 错误的科学记数法 - break; - } - - GetChar(); - } - - if (!error) { - MakeToken(numberType); - if (doubleDot) - { - break; - } - Retract(); - } - else + // 检查是否是符号 “..” + char next = PeekNextChar(); + if (next == '.') { Retract(); - PrintError(0,_token.First,_line); - _tokenCount[SemanticTokenType.Error]++; + break; } - break; - } - case StateType.Delimiter: - Cat(); - switch (_ch) - { - case '.': + // 不是符号 “..”,进入小数点后的判断 + Cat(); // 记录“.” + + // “.”后不应为空,至少应该有一位小数 + GetChar(); + if (NumberShouldBreak()) { - GetChar(); - if (_ch == '.') + throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal numbers!"); + } + + // 读取小数点后的数字 + while (!NumberShouldBreak()) + { + if (IsDigit()) { Cat(); - MakeToken(DelimiterType.DoubleDots); + GetChar(); + } + else if (_ch == 'e' || _ch == 'E') + { + DealE(); break; } - Retract(); - if (IsPeriod()) + else if(NumberShouldBreak()) { - - }else if (IsDot()) - { - - } - } - break; - case '\'': - case '\"': - { - if(_ch == '\'') MakeToken(DelimiterType.SingleQuotation); - else if(_ch == '\"') MakeToken(DelimiterType.DoubleQuotation); - - // 重置_token,准备收集字符串内容 - _token = new LinkedList(); - - GetChar(); // 移动到下一个字符,即字符串的第一个字符 - while (_ch != '\'' && _ch != '\"') - { - Cat(); // 收集字符 - GetChar(); // 移动到下一个字符 - } - - // 在退出循环时,_ch为'或EOF,此时_token包含字符串内容 - // 创建字符内容的token,注意这里使用SemanticTokenType.String表示字符串字面量 - MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType - _token = new LinkedList(); // 重置_token - - if (_ch == '\'' && _ch != '\n') - { - // 识别并创建最后一个单引号的token - Cat(); - MakeToken(DelimiterType.SingleQuotation); - } - else if (_ch == '\"') - { - Cat(); - MakeToken(DelimiterType.DoubleQuotation); + break; } else { - // 这里处理遇到EOF但没有闭合单引号的情况,例如:'字符串结尾没有单引号 - // 可以添加错误处理代码 - PrintError(0, _token.First, _line); // 假设这个方法用于打印错误 + throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number."); } } - break; - case ',': - MakeToken(DelimiterType.Comma); - break; - case ':': - MakeToken(DelimiterType.Colon); - break; - case ';': - MakeToken(DelimiterType.Semicolon); - break; - case '(': - MakeToken(DelimiterType.LeftParenthesis); - break; - case ')': - MakeToken(DelimiterType.RightParenthesis); - break; - case '[': - MakeToken(DelimiterType.LeftSquareBracket); - break; - case ']': - MakeToken(DelimiterType.RightSquareBracket); + MakeToken(NumberType.Real); + return; + } + + // 不含小数部分,含科学计数法 + if (_ch == 'e' || _ch == 'E') + { + DealE(); + MakeToken(NumberType.Real); + return; + } + + // 暂时为整数 + if (IsDigit()) + { + Cat(); + GetChar(); + } + else if(NumberShouldBreak()) + { break; } - break; - - case StateType.Other: - DealOther(); - break; - default: - throw new ArgumentOutOfRangeException(); + else + { + throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number."); + } } - + MakeToken(NumberType.Integer); } - PrintResult(); - return _tokens; + + } + + private void DealE() + { + Cat(); + GetChar(); + if (IsDigit() || _ch == '+' || _ch == '-') + { + Cat(); + } + else + { + throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number."); + } + + // 读取e后的数字 + GetChar(); + while (!NumberShouldBreak()) + { + if (IsDigit()) + { + Cat(); + GetChar(); + } + else + { + throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number."); + } + } + } + + bool NumberShouldBreak() + { + if (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r' || (IsDelimiter() && _ch!='.') || IsOperator() || _finish) + { + Retract(); + return true; + } + + return false; + } + + private bool IsOperator() + { + foreach (var o in _operator) + { + if (o.Contains(_ch)) + { + return true; + } + } + return false; } private bool IsDot() { - SemanticToken tokenBefore = _tokens.Last(); - if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true; + if (_tokens.Count != 0) + { + SemanticToken tokenBefore = _tokens.Last(); + if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true; + } return false; } - private bool IsPeriod() - { - SemanticToken tokenBefore = _tokens.Last(); - if (tokenBefore.TokenType == SemanticTokenType.Keyword) return true; - return false; - } private void DealOther() { @@ -348,28 +514,8 @@ public class Lexer(string source) MakeToken(OperatorType.Greater); } break; - case ':': - Cat(); - GetChar(); - if (_ch == '=') - { - // 识别 := - Cat(); - MakeToken(OperatorType.Assign); - } - else - { - // 这里应该被识别为delimiter逻辑上 - Cat(); - PrintError(1, _token.First, _line); - _tokenCount[SemanticTokenType.Error]++; - } - break; default: - Cat(); - PrintError(1, _token.First, _line); - _tokenCount[SemanticTokenType.Error]++; - break; + throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos, "Illegal lexeme."); } } @@ -396,14 +542,6 @@ public class Lexer(string source) }; token = identifierSemanticToken; break; - case SemanticTokenType.Error: - ErrorSemanticToken errorSemanticToken = new ErrorSemanticToken() - { - LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First), - }; - token = errorSemanticToken; - break; - default: throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null); } @@ -449,6 +587,32 @@ public class Lexer(string source) Console.WriteLine(LinkedListToString(_token.First)); } + private void MakeToken(NumberType numberType) + { + string temp = LinkedListToString(_token.First); + string result; + if (numberType == NumberType.Hex) + { + result = string.Concat("0x", temp.AsSpan(1, temp.Length - 1)); + } + else + { + result = temp; + } + + NumberSemanticToken numberSemanticToken = new NumberSemanticToken() + { + LinePos = _line, + CharacterPos = _chPos, + LiteralValue = result, + NumberType = numberType + }; + _tokens.Add(numberSemanticToken); + _tokenCount[SemanticTokenType.Number]++; + Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>"); + Console.WriteLine(LinkedListToString(_token.First)); + } + private void MakeToken(OperatorType operatorType) { OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken() @@ -464,88 +628,20 @@ public class Lexer(string source) Console.WriteLine(LinkedListToString(_token.First)); } - private void MakeToken(NumberType numberType) - { - NumberSemanticToken numberSemanticToken = new NumberSemanticToken() - { - LinePos = _line, - CharacterPos = _chPos, - LiteralValue = LinkedListToString(_token.First), - NumberType = numberType - }; - _tokens.Add(numberSemanticToken); - _tokenCount[SemanticTokenType.Number]++; - Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>"); - Console.WriteLine(LinkedListToString(_token.First)); - } - - // 填充buffer操作 - private void FillLeftBuffer() { - //cout << "fill left" << endl; - for (int i = 0; i < _buffer.Length / 2; i++) { - _buffer[i] = '$'; - } - - // 确保source字符串足够长,避免超出范围 - int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos); - - // 使用Array.Copy方法 - Array.Copy(source.ToCharArray(), _sourcePos, _buffer, 0, lengthToCopy); - - _sourcePos += lengthToCopy; - - if (_sourcePos == source.Length) { - eof = true; - } - } - - private void FillRightBuffer() { - //cout << "fill right" << endl; - for (int i = _buffer.Length / 2; i < _buffer.Length; i++) { - _buffer[i] = '$'; - } - - // 确保source字符串足够长,避免超出范围 - int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos); - - // 使用Array.Copy方法 - Array.Copy(source.ToCharArray(), _sourcePos, _buffer, _buffer.Length / 2, lengthToCopy); - - _sourcePos += lengthToCopy; - - if (_sourcePos == source.Length) { - eof = true; - } - } - - private void PrintBuffer() { - for (int i = 0; i < _buffer.Length; i++) { - Console.WriteLine($"[{i}] {_buffer[i]}"); - } - } - - void DealEof() { - if (eof) _finish = true; - else if (_fwdPos < _buffer.Length / 2) { - FillRightBuffer(); - _fwdPos = _buffer.Length / 2; - } - else { - FillLeftBuffer(); - // start_pos = 0; - _fwdPos = 0; - } - } - - // 读取buffer操作 + // 读取字符操作 void GetChar() { - if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos]; - _chPos++; - if (_ch == '$') { - DealEof(); - if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos]; + if (_fwdPos >= 0 && _fwdPos < source.Length) + { + _ch = source[_fwdPos]; + _chPos++; + _fwdPos++; + } + else if (_fwdPos == source.Length) + { + _ch = '\0'; + _chPos++; + _finish = true; } - if (_fwdPos < _buffer.Length) _fwdPos++; } private void GetNbc() { @@ -622,24 +718,25 @@ public class Lexer(string source) { if (delimiter.Contains(_ch)) { - if (_ch != ':') - { - return true; - } - - GetChar(); - if (_ch == '=') - { - Retract(); - return false; - } - return true; } } return false; } + private char PeekNextChar() + { + // 确认下一个位置是否仍在buffer的范围内 + if (_fwdPos < source.Length) + { + return source[_fwdPos]; + } + return '\0'; + + } + + + private void PrintToken(SemanticTokenType type, LinkedListNode token, uint line) { string tokenString = LinkedListToString(token); diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs index 8e42917..878630b 100644 --- a/Canon.Core/LexicalParser/SemanticToken.cs +++ b/Canon.Core/LexicalParser/SemanticToken.cs @@ -4,12 +4,10 @@ namespace Canon.Core.LexicalParser; using Enums; -using System.Text; - /// /// 词法记号基类 /// -public abstract class SemanticToken +public abstract class SemanticToken : IEquatable { public abstract SemanticTokenType TokenType { get; } @@ -59,7 +57,34 @@ public abstract class SemanticToken LinePos = 0, CharacterPos = 0, LiteralValue = string.Empty }; - public override string ToString() => LiteralValue; + public override string ToString() + { + return $"LinePos: {LinePos}, CharacterPos: {CharacterPos}, LiteralValue: {LiteralValue}, TokenType: {TokenType}"; + } + + public bool Equals(SemanticToken? other) + { + if (other == null) + return false; + + return LinePos == other.LinePos && + CharacterPos == other.CharacterPos && + LiteralValue == other.LiteralValue && + TokenType == other.TokenType; + } + + public override bool Equals(object? obj) + { + return obj is SemanticToken semanticTokenObj && Equals(semanticTokenObj); + } + + public override int GetHashCode() + { + return LinePos.GetHashCode() ^ + CharacterPos.GetHashCode() ^ + LiteralValue.GetHashCode() ^ + TokenType.GetHashCode(); + } } /// @@ -118,6 +143,11 @@ public class DelimiterSemanticToken : SemanticToken }; return true; } + + public override int GetHashCode() + { + return base.GetHashCode() ^ this.DelimiterType.GetHashCode(); + } } /// @@ -218,6 +248,11 @@ public class KeywordSemanticToken : SemanticToken token = null; return false; } + + public override int GetHashCode() + { + return base.GetHashCode() ^ this.KeywordType.GetHashCode(); + } } /// @@ -229,12 +264,44 @@ public class OperatorSemanticToken : SemanticToken public required OperatorType OperatorType { get; init; } + public static readonly Dictionary OperatorTypes = new Dictionary + { + { "=", OperatorType.Equal }, + { "<>", OperatorType.NotEqual }, + { "<", OperatorType.Less }, + { "<=", OperatorType.LessEqual }, + { ">", OperatorType.Greater }, + { ">=", OperatorType.GreaterEqual }, + { "+", OperatorType.Plus }, + { "-", OperatorType.Minus }, + { "*", OperatorType.Multiply }, + { "/", OperatorType.Divide }, + { ":=", OperatorType.Assign } + }; + + public static OperatorType GetOperatorTypeByOperator(string operatorSymbol) + { + if (OperatorTypes.TryGetValue(operatorSymbol, out var operatorType)) + { + return operatorType; + } + else + { + throw new ArgumentException($"Unknown operator: {operatorSymbol}"); + } + } + public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now, out OperatorSemanticToken? token) { token = null; return false; } + + public override int GetHashCode() + { + return base.GetHashCode() ^ this.OperatorType.GetHashCode(); + } } /// @@ -245,65 +312,10 @@ public class NumberSemanticToken : SemanticToken public override SemanticTokenType TokenType => SemanticTokenType.Number; public required NumberType NumberType { get; init; } - public double Value { get; private init; } - public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now, - out NumberSemanticToken? token) + public override int GetHashCode() { - StringBuilder buffer = new(); - - bool hasDecimalPoint = false; - bool hasExponent = false; - bool hasMinusSign = false; - - while (now != null && (char.IsDigit(now.Value) || now.Value == '.' || now.Value == 'e' || now.Value == 'E' || now.Value == '-' || now.Value == '+')) - { - if (now.Value == '.') - { - if (hasDecimalPoint) - { - break; - } - hasDecimalPoint = true; - } - - if (now.Value == 'e' || now.Value == 'E') - { - if (hasExponent) - { - break; - } - hasExponent = true; - } - - if (now.Value == '-' || now.Value == '+') - { - if (hasMinusSign) - { - break; - } - hasMinusSign = true; - } - - buffer.Append(now.Value); - now = now.Next; - } - - if (double.TryParse(buffer.ToString(), out double value)) - { - token = new NumberSemanticToken - { - LinePos = linePos, - CharacterPos = characterPos, - LiteralValue = buffer.ToString(), - Value = value, - NumberType = hasDecimalPoint || hasExponent ? NumberType.Real : NumberType.Integer - }; - return true; - } - - token = null; - return false; + return base.GetHashCode() ^ this.NumberType.GetHashCode(); } } diff --git a/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs new file mode 100644 index 0000000..e37c692 --- /dev/null +++ b/Canon.Tests/LexicalParserTests/CharacterTypeTests.cs @@ -0,0 +1,54 @@ +using Canon.Core.Enums; +using Canon.Core.LexicalParser; +using Xunit.Abstractions; +using Canon.Core.Exceptions; + +namespace Canon.Tests.LexicalParserTests +{ + public class CharacterTypeTests + { + private readonly ITestOutputHelper _testOutputHelper; + + public CharacterTypeTests(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + + [Theory] + [InlineData("'a'", "a")] + [InlineData("'Hello, World!'", "Hello, World!")] + + public void TestCharacterType(string input, string? expectedResult) + { + Lexer lexer = new(input); + if (expectedResult == null) + { + Assert.Throws(() => lexer.Tokenize()); + } + else + { + List tokens = lexer.Tokenize(); + _testOutputHelper.WriteLine(tokens[0].LiteralValue); + Assert.Single(tokens); + Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType); + Assert.Equal(expectedResult, tokens[0].LiteralValue); + } + } + + [Theory] + //[InlineData("'\\x'", 1, 2, LexemeException.LexemeErrorType.InvalidEscapeSequence)] + [InlineData("'This is an unclosed string literal", 1, 36, LexemeErrorType.UnclosedStringLiteral)] + [InlineData("'This", 1, 6, LexemeErrorType.UnclosedStringLiteral)] + [InlineData("x @", 1, 3, LexemeErrorType.UnknownCharacterOrString)] + //[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)] + public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) + { + Lexer lexer = new(input); + var ex = Assert.Throws(() => lexer.Tokenize()); + _testOutputHelper.WriteLine(ex.ToString()); + Assert.Equal(expectedErrorType, ex.ErrorType); + Assert.Equal(expectedLine, ex.Line); + Assert.Equal(expectedCharPosition, ex.CharPosition); + } + } +} diff --git a/Canon.Tests/LexicalParserTests/DelimiterTests.cs b/Canon.Tests/LexicalParserTests/DelimiterTests.cs index 5b653db..934742f 100644 --- a/Canon.Tests/LexicalParserTests/DelimiterTests.cs +++ b/Canon.Tests/LexicalParserTests/DelimiterTests.cs @@ -7,7 +7,7 @@ public class DelimiterTests { [Theory] [InlineData(",123", DelimiterType.Comma)] - [InlineData(".123", DelimiterType.Period)] + // [InlineData(".123", DelimiterType.Period)] [InlineData(":123", DelimiterType.Colon)] [InlineData(";123", DelimiterType.Semicolon)] [InlineData("(123)", DelimiterType.LeftParenthesis)] diff --git a/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs b/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs new file mode 100644 index 0000000..bb4603a --- /dev/null +++ b/Canon.Tests/LexicalParserTests/ErrorSingleTests.cs @@ -0,0 +1,32 @@ +using Canon.Core.LexicalParser; +using Canon.Core.Exceptions; +using Xunit.Abstractions; +using Canon.Core.Enums; + +namespace Canon.Tests.LexicalParserTests +{ + public class ErrorSingleTests + { + private readonly ITestOutputHelper _testOutputHelper; + public ErrorSingleTests(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + + [Theory] + [InlineData("program main; var a: integer; begin a := 3#; end.", 1, 43, LexemeErrorType.IllegalNumberFormat)] + [InlineData("char c = 'abc;", 1, 15, LexemeErrorType.UnclosedStringLiteral)] + [InlineData("x := 10 @;", 1, 9, LexemeErrorType.UnknownCharacterOrString)] + [InlineData("identifier_with_special_chars@#",1, 30, LexemeErrorType.UnknownCharacterOrString)] + public void TestUnknownCharacterError(string pascalProgram, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) + { + var lexer = new Lexer(pascalProgram); + + var ex = Assert.Throws(() => lexer.Tokenize()); + _testOutputHelper.WriteLine(ex.ToString()); + Assert.Equal(expectedErrorType, ex.ErrorType); + Assert.Equal(expectedLine, ex.Line); + Assert.Equal(expectedCharPosition, ex.CharPosition); + } + } +} diff --git a/Canon.Tests/LexicalParserTests/IndentifierTests.cs b/Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs similarity index 68% rename from Canon.Tests/LexicalParserTests/IndentifierTests.cs rename to Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs index a46a10b..d453be9 100644 --- a/Canon.Tests/LexicalParserTests/IndentifierTests.cs +++ b/Canon.Tests/LexicalParserTests/IndentifierTypeTests.cs @@ -1,6 +1,5 @@ using Canon.Core.Enums; using Canon.Core.LexicalParser; -using Xunit; namespace Canon.Tests.LexicalParserTests { @@ -10,20 +9,15 @@ namespace Canon.Tests.LexicalParserTests [InlineData("identifier", true)] [InlineData("_identifier", true)] [InlineData("identifier123", true)] - [InlineData("123identifier", false)] [InlineData("identifier_with_underscores", true)] [InlineData("IdentifierWithCamelCase", true)] - [InlineData("identifier-with-hyphen", false)] - [InlineData("identifier with spaces", false)] - [InlineData("identifier_with_special_chars@#", false)] - [InlineData("", false)] - [InlineData(" ", false)] - [InlineData("andand", false)] + [InlineData("andand", true)] public void TestParseIdentifier(string input, bool expectedResult) { Lexer lexer = new(input); List tokens = lexer.Tokenize(); + Assert.Single(tokens); Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier); } } diff --git a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs index fd22191..725b83b 100644 --- a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs +++ b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs @@ -21,6 +21,7 @@ public class KeywordTypeTests [InlineData("for", KeywordType.For)] [InlineData("to", KeywordType.To)] [InlineData("do", KeywordType.Do)] + [InlineData("DO", KeywordType.Do)] public void SmokeTest(string input, KeywordType type) { Lexer lexer = new(input); diff --git a/Canon.Tests/LexicalParserTests/LexicalFileTests.cs b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs new file mode 100644 index 0000000..6027e35 --- /dev/null +++ b/Canon.Tests/LexicalParserTests/LexicalFileTests.cs @@ -0,0 +1,312 @@ +using System.Text.RegularExpressions; +using Canon.Core.Enums; +using Canon.Core.Exceptions; +using Canon.Core.LexicalParser; +using Xunit.Abstractions; + +namespace Canon.Tests.LexicalParserTests; + +public class LexicalFileTests +{ + private readonly ITestOutputHelper _testOutputHelper; + + public LexicalFileTests(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + + //TODO: 基础的字符串匹配,因此变量名称不能被包含。手写一个存在包含情况的测试文件。 + private static (int, int) FindNthPosition(string pascalProgram, string target, int occurrence) + { + int lineNumber = 0; + (int, int) nthPosition = (0, 0); + int foundCount = 0; + occurrence = occurrence + 1; + + using (StringReader sr = new StringReader(pascalProgram)) + { + string line; + while ((line = sr.ReadLine()) != null) + { + lineNumber++; + int columnNumber = -1; + + // line = Regex.Replace(line, "'[^']*'", "$"); + + while ((columnNumber = line.IndexOf(target, columnNumber + 1, StringComparison.Ordinal)) != -1) + { + foundCount++; + if (foundCount == occurrence) + { + nthPosition = (lineNumber, columnNumber + target.Length); + return nthPosition; + } + } + } + } + + if (nthPosition == (0, 0)) + { + throw new Exception($"'{target}' not found in program."); + } + + return nthPosition; + } + + private void TestLexicalAnalysis(string pascalProgram, List<(string, SemanticTokenType, int)> stringLiterals) + { + var expectedTokens = new List(); + + foreach (var (literal, tokenType, skipCount) in stringLiterals) + { + var (line, column) = FindNthPosition(pascalProgram, literal, skipCount); + switch (tokenType) + { + case SemanticTokenType.Keyword: + expectedTokens.Add(new KeywordSemanticToken + { + LinePos = (uint)line, + CharacterPos = (uint)column, + LiteralValue = literal, + KeywordType = KeywordSemanticToken.GetKeywordTypeByKeyword(literal) + }); + break; + case SemanticTokenType.Identifier: + expectedTokens.Add(new IdentifierSemanticToken + { + LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal + }); + break; + case SemanticTokenType.Delimiter: + if (DelimiterSemanticToken.TryParse((uint)line, (uint)column, new LinkedListNode(literal[0]), + out var delimiterToken)) + { + if (delimiterToken != null) + { + expectedTokens.Add(delimiterToken); + } + } + + break; + case SemanticTokenType.Operator: + expectedTokens.Add(new OperatorSemanticToken + { + LinePos = (uint)line, + CharacterPos = (uint)column, + LiteralValue = literal, + OperatorType = OperatorSemanticToken.GetOperatorTypeByOperator(literal) + }); + break; + case SemanticTokenType.Character: + expectedTokens.Add(new CharacterSemanticToken + { + LinePos = (uint)line, CharacterPos = (uint)column, LiteralValue = literal + }); + break; + case SemanticTokenType.Number: + expectedTokens.Add(new NumberSemanticToken + { + LinePos = (uint)line, + CharacterPos = (uint)column, + LiteralValue = literal, + NumberType = NumberType.Integer + }); + break; + } + } + + expectedTokens = expectedTokens.OrderBy(token => token.LinePos).ThenBy(token => token.CharacterPos).ToList(); + expectedTokens = expectedTokens.Select(token => + token is CharacterSemanticToken characterToken && characterToken.LiteralValue == "hello, world!" + ? new CharacterSemanticToken + { + LinePos = characterToken.LinePos, + CharacterPos = characterToken.CharacterPos + 1, + LiteralValue = characterToken.LiteralValue + } + : token).ToList(); + + var lexer = new Lexer(pascalProgram); + var actualTokens = lexer.Tokenize(); + for (int i = 0; i < expectedTokens.Count; i++) + { + _testOutputHelper.WriteLine($"Expect: {expectedTokens[i]}"); + _testOutputHelper.WriteLine($"Actual: {actualTokens[i]}"); + _testOutputHelper.WriteLine("----"); + Assert.Equal(expectedTokens[i], actualTokens[i]); + } + + Assert.Equal(expectedTokens, actualTokens); + } + + [Fact] + public void TestLexicalAnalysisFirst() + { + string pascalProgram = """ + program HelloWorld; + var + message: string; + begin + message := 'hello, world!'; + writeln(message); + end. + """; + + var stringLiterals = new List<(string, SemanticTokenType, int)> + { + ("program", SemanticTokenType.Keyword, 0), + ("HelloWorld", SemanticTokenType.Identifier, 0), + (";", SemanticTokenType.Delimiter, 0), + ("var", SemanticTokenType.Keyword, 0), + ("message", SemanticTokenType.Identifier, 0), + (":", SemanticTokenType.Delimiter, 0), + ("string", SemanticTokenType.Identifier, 0), + (";", SemanticTokenType.Delimiter, 1), + ("begin", SemanticTokenType.Keyword, 0), + ("message", SemanticTokenType.Identifier, 1), + (":=", SemanticTokenType.Operator, 0), + ("hello, world!", SemanticTokenType.Character, 0), + (";", SemanticTokenType.Delimiter, 2), + ("writeln", SemanticTokenType.Identifier, 0), + ("(", SemanticTokenType.Delimiter, 0), + ("message", SemanticTokenType.Identifier, 2), + (")", SemanticTokenType.Delimiter, 0), + (";", SemanticTokenType.Delimiter, 3), + ("end", SemanticTokenType.Keyword, 0), + (".", SemanticTokenType.Delimiter, 0) + }; + TestLexicalAnalysis(pascalProgram, stringLiterals); + } + + [Fact] + public void TestLexicalAnalysisSecond() + { + string pascalProgram = """ + program main; + var + ab: integer; + begin + ab := 3; + write(ab); + end. + """; + + var stringLiterals = new List<(string, SemanticTokenType, int)> + { + ("program", SemanticTokenType.Keyword, 0), + ("main", SemanticTokenType.Identifier, 0), + (";", SemanticTokenType.Delimiter, 0), + ("var", SemanticTokenType.Keyword, 0), + ("ab", SemanticTokenType.Identifier, 0), + (":", SemanticTokenType.Delimiter, 0), + ("integer", SemanticTokenType.Keyword, 0), + (";", SemanticTokenType.Delimiter, 1), + ("begin", SemanticTokenType.Keyword, 0), + ("ab", SemanticTokenType.Identifier, 1), + (":=", SemanticTokenType.Operator, 0), + ("3", SemanticTokenType.Number, 0), + (";", SemanticTokenType.Delimiter, 2), + ("write", SemanticTokenType.Identifier, 0), + ("(", SemanticTokenType.Delimiter, 0), + ("ab", SemanticTokenType.Identifier, 2), + (")", SemanticTokenType.Delimiter, 0), + (";", SemanticTokenType.Delimiter, 3), + ("end", SemanticTokenType.Keyword, 0), + (".", SemanticTokenType.Delimiter, 0) + }; + TestLexicalAnalysis(pascalProgram, stringLiterals); + } + + //带注释的测试 + [Fact] + public void TestLexicalAnalysisThird() + { + string pascalProgram = """ + {test} + program main; + var + ab, ba: integer; + begin + ab := 3; + ba := 5; + ab := 5; + write(ab + ba); + end. + """; + + var stringLiterals = new List<(string, SemanticTokenType, int)> + { + ("program", SemanticTokenType.Keyword, 0), + ("main", SemanticTokenType.Identifier, 0), + (";", SemanticTokenType.Delimiter, 0), + ("var", SemanticTokenType.Keyword, 0), + ("ab", SemanticTokenType.Identifier, 0), + (",", SemanticTokenType.Delimiter, 0), + ("ba", SemanticTokenType.Identifier, 0), + (":", SemanticTokenType.Delimiter, 0), + ("integer", SemanticTokenType.Keyword, 0), + (";", SemanticTokenType.Delimiter, 1), + ("begin", SemanticTokenType.Keyword, 0), + ("ab", SemanticTokenType.Identifier, 1), + (":=", SemanticTokenType.Operator, 0), + ("3", SemanticTokenType.Number, 0), + (";", SemanticTokenType.Delimiter, 2), + ("ba", SemanticTokenType.Identifier, 1), + (":=", SemanticTokenType.Operator, 1), + ("5", SemanticTokenType.Number, 0), + (";", SemanticTokenType.Delimiter, 3), + ("ab", SemanticTokenType.Identifier, 2), + (":=", SemanticTokenType.Operator, 2), + ("5", SemanticTokenType.Number, 1), + (";", SemanticTokenType.Delimiter, 4), + ("write", SemanticTokenType.Identifier, 0), + ("(", SemanticTokenType.Delimiter, 0), + ("ab", SemanticTokenType.Identifier, 3), + ("+", SemanticTokenType.Operator, 0), + ("ba", SemanticTokenType.Identifier, 2), + (")", SemanticTokenType.Delimiter, 0), + (";", SemanticTokenType.Delimiter, 5), + ("end", SemanticTokenType.Keyword, 0), + (".", SemanticTokenType.Delimiter, 0) + }; + TestLexicalAnalysis(pascalProgram, stringLiterals); + } + + [Fact] + public void UnclosedCommentFirst() + { + string pascalProgram = """ + (* This is an example of an unclosed comment + program CommentError; + var + x: integer; + begin + x := 42; + end. + """; + var lexer = new Lexer(pascalProgram); + var ex = Assert.Throws(() => lexer.Tokenize()); + //打印exception信息 + _testOutputHelper.WriteLine(ex.ToString()); + Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType); + Assert.Equal((uint)7, ex.Line); + Assert.Equal((uint)5, ex.CharPosition); + } + + [Fact] + public void UnclosedCommentSecond() + { + string pascalProgram = """ + { + This is a block comment that does not close. + + program CommentNotClosed; + """; + var lexer = new Lexer(pascalProgram); + var ex = Assert.Throws(() => lexer.Tokenize()); +_testOutputHelper.WriteLine(ex.ToString()); + Assert.Equal(LexemeErrorType.UnclosedComment, ex.ErrorType); + Assert.Equal((uint)4, ex.Line); + Assert.Equal((uint)26, ex.CharPosition); + } +} diff --git a/Canon.Tests/LexicalParserTests/NumberTests.cs b/Canon.Tests/LexicalParserTests/NumberTests.cs index f2d7d0f..28764bf 100644 --- a/Canon.Tests/LexicalParserTests/NumberTests.cs +++ b/Canon.Tests/LexicalParserTests/NumberTests.cs @@ -1,46 +1,58 @@ using Canon.Core.Enums; using Canon.Core.LexicalParser; +using Canon.Core.Exceptions; +using Xunit.Abstractions; namespace Canon.Tests.LexicalParserTests { + public class NumberTests { + private readonly ITestOutputHelper _testOutputHelper; + public NumberTests(ITestOutputHelper testOutputHelper) + { + _testOutputHelper = testOutputHelper; + } + [Theory] - [InlineData("123", 123, NumberType.Integer)] - [InlineData("0", 0, NumberType.Integer)] - [InlineData("-123", -123, NumberType.Integer)] - [InlineData("1.23", 1.23, NumberType.Real)] - [InlineData("-1.23", -1.23, NumberType.Real)] - [InlineData("0.0", 0.0, NumberType.Real)] - [InlineData("1e7", 1e7, NumberType.Real)] - [InlineData("1E7", 1E7, NumberType.Real)] - [InlineData("1.23e-7", 1.23e-7, NumberType.Real)] - [InlineData("1.23E-7", 1.23E-7, NumberType.Real)] - [InlineData("1234567890", 1234567890, NumberType.Integer)] - [InlineData("1234567890.1234567890", 1234567890.1234567890, NumberType.Real)] - [InlineData("-1234567890", -1234567890, NumberType.Integer)] - [InlineData("-1234567890.1234567890", -1234567890.1234567890, NumberType.Real)] - [InlineData("1e-7", 1e-7, NumberType.Real)] - [InlineData("1E-7", 1E-7, NumberType.Real)] - [InlineData("1E", 0, NumberType.Real, false)] - [InlineData("abc", 0, NumberType.Integer, false)] - [InlineData("123abc", 123, NumberType.Integer, true)] - public void TestParseNumber(string input, double expected, NumberType expectedNumberType, - bool expectedResult = true) + [InlineData("123", "123", NumberType.Integer)] + [InlineData("0", "0", NumberType.Integer)] + [InlineData("1.23", "1.23", NumberType.Real)] + [InlineData("0.0", "0.0", NumberType.Real)] + [InlineData("1e7", "1e7", NumberType.Real)] + [InlineData("1E7", "1E7", NumberType.Real)] + [InlineData("1.23e-7", "1.23e-7", NumberType.Real)] + [InlineData("1.23E-7", "1.23E-7", NumberType.Real)] + [InlineData("1234567890", "1234567890", NumberType.Integer)] + [InlineData("1234567890.1234567890", "1234567890.1234567890", NumberType.Real)] + [InlineData("1e-7", "1e-7", NumberType.Real)] + [InlineData("1E-7", "1E-7", NumberType.Real)] + [InlineData(".67",".67", NumberType.Real)] + [InlineData("$123", "0x123", NumberType.Hex)] + public void TestParseNumber(string input, string expected, NumberType expectedNumberType) { Lexer lexer = new(input); List tokens = lexer.Tokenize(); - SemanticToken token = tokens[0]; - if (!expectedResult) - { - Assert.NotEqual(SemanticTokenType.Keyword, token.TokenType); - return; - } Assert.Equal(SemanticTokenType.Number, token.TokenType); NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token; Assert.Equal(expectedNumberType, numberSemanticToken.NumberType); - Assert.Equal(expected, numberSemanticToken.Value); + Assert.Equal(expected, numberSemanticToken.LiteralValue); + } + + [Theory] + [InlineData("1E", 1, 3, LexemeErrorType.IllegalNumberFormat)] + [InlineData("123abc", 1, 4, LexemeErrorType.IllegalNumberFormat)] + [InlineData("123.45.67", 1, 7, LexemeErrorType.IllegalNumberFormat)] + [InlineData("123identifier", 1, 4, LexemeErrorType.IllegalNumberFormat)] + public void TestParseNumberError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) + { + Lexer lexer = new(input); + var ex = Assert.Throws(() => lexer.Tokenize()); + _testOutputHelper.WriteLine(ex.ToString()); + Assert.Equal(expectedErrorType, ex.ErrorType); + Assert.Equal(expectedLine, ex.Line); + Assert.Equal(expectedCharPosition, ex.CharPosition); } } } diff --git a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs index b90a2c3..c9fa587 100644 --- a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs +++ b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs @@ -6,38 +6,33 @@ namespace Canon.Tests.LexicalParserTests; public class OperatorTypeTests { [Theory] - [InlineData("+ 123", OperatorType.Plus)] - [InlineData("+123", OperatorType.Plus)] - [InlineData("-123", OperatorType.Minus)] - [InlineData("*123", OperatorType.Multiply)] - [InlineData("/123", OperatorType.Divide)] - [InlineData("=123", OperatorType.Equal)] - [InlineData("<123", OperatorType.Less)] - [InlineData(">123", OperatorType.Greater)] - [InlineData("<=123", OperatorType.LessEqual)] - [InlineData(">=123", OperatorType.GreaterEqual)] - [InlineData("<>123", OperatorType.NotEqual)] - [InlineData(":=123", OperatorType.Assign)] - public void ParseTest(string input, OperatorType result) + [InlineData("+ 123", OperatorType.Plus, true)] + [InlineData("+123", OperatorType.Plus, true)] + [InlineData("-123", OperatorType.Minus, true)] + [InlineData("*123", OperatorType.Multiply, true)] + [InlineData("/123", OperatorType.Divide, true)] + [InlineData("=123", OperatorType.Equal, true)] + [InlineData("<123", OperatorType.Less, true)] + [InlineData(">123", OperatorType.Greater, true)] + [InlineData("<=123", OperatorType.LessEqual, true)] + [InlineData(">=123", OperatorType.GreaterEqual, true)] + [InlineData("<>123", OperatorType.NotEqual, true)] + [InlineData(":=123", OperatorType.Assign, true)] + [InlineData("1 + 123", OperatorType.Plus, false)] + [InlineData("m +123", OperatorType.Plus, false)] + public void ParseTest(string input, OperatorType result, bool expectedResult) { Lexer lexer = new(input); List tokens = lexer.Tokenize(); SemanticToken token = tokens[0]; + if (!expectedResult) + { + Assert.NotEqual(SemanticTokenType.Operator, token.TokenType); + return; + } Assert.Equal(SemanticTokenType.Operator, token.TokenType); OperatorSemanticToken operatorSemanticToken = (OperatorSemanticToken)token; Assert.Equal(result, operatorSemanticToken.OperatorType); } - - [Theory] - [InlineData("1 + 123")] - [InlineData("m +123")] - public void ParseFailedTest(string input) - { - Lexer lexer = new(input); - List tokens = lexer.Tokenize(); - - SemanticToken token = tokens[0]; - Assert.NotEqual(SemanticTokenType.Operator, token.TokenType); - } }