diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs index a334e37..6057329 100644 --- a/Canon.Core/Enums/SemanticEnums.cs +++ b/Canon.Core/Enums/SemanticEnums.cs @@ -8,14 +8,12 @@ public enum SemanticTokenType Delimiter, Identifier, Character, + Empty, + Error, // 加了一个错误token /// /// 语法分析中的栈底符号 /// - End, - /// - /// 语法分析中的空串符号 - /// - Empty + End } public enum DelimiterType @@ -89,3 +87,11 @@ public enum NumberType Real, Hex } + +public enum StateType +{ + Word, + Digit, + Delimiter, + Other +} diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs index 3d618d3..516de9b 100644 --- a/Canon.Core/LexicalParser/Lexer.cs +++ b/Canon.Core/LexicalParser/Lexer.cs @@ -1,92 +1,677 @@ -namespace Canon.Core.LexicalParser; +using System.Text; +using Canon.Core.Enums; -public class Lexer +namespace Canon.Core.LexicalParser; + +public class Lexer(string source) { - private readonly LinkedList _source; - private LinkedListNode? _currentNode; - private uint _line = 1; - private uint _charPosition; - private readonly List _tokens = []; - public Lexer(string source) + // 保留关键字 + private readonly string[] _keywords = + [ + "Program", "Const", "Var", "Procedure", + "Function", "Begin", "End", "Array", + "Of", "If", "Then", "Else", + "For", "To", "Do", "Integer", + "Real", "Boolean", "Character", "Divide", + "Not", "Mod", "And", "Or" + ]; + + private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]","'","\"",".."]; + + // 状态机 + private StateType _state; + private char _ch; + + private LinkedList _token = new LinkedList(); + // bool save; + // int saved_state; + bool _finish; + private bool eof; + + //缓冲区 + private readonly char[] _buffer = new char[2048]; + // int start_pos; + private int _fwdPos; + + // 计数器 + private uint _line = 1; + private uint _chPos; + private int _sourcePos; + private readonly Dictionary _tokenCount = new Dictionary { - // 将字符串转换为LinkedList - _source = new LinkedList(source); - _currentNode = _source.First; - } + { SemanticTokenType.Keyword, 0 }, + { SemanticTokenType.Number, 0 }, + { SemanticTokenType.Operator, 0 }, + { SemanticTokenType.Delimiter, 0 }, + { SemanticTokenType.Identifier, 0 }, + { SemanticTokenType.Character, 0 }, + { SemanticTokenType.Error, 0 }, + { SemanticTokenType.End, 0 } + }; + + private readonly List _tokens = []; public List Tokenize() { - while (_currentNode != null) - { - _charPosition = 0; // 重置字符位置 - SkipWhitespace(); + // 缓冲区 + // start_pos = 0; + _fwdPos = 0; + FillLeftBuffer(); - if (_currentNode == null) break; // 如果跳过空格后到达了末尾,则退出循环 + // 状态机 + _finish = false; - SemanticToken? token = null; + while (!_finish) { + GetChar(); + GetNbc(); - // 尝试解析各种类型的词法单元 - if (DelimiterSemanticToken.TryParse(_line, _charPosition, _currentNode, out var delimiterToken)) - { - token = delimiterToken; - } - else if (CharacterSemanticToken.TryParse(_line, _charPosition, _currentNode, out var characterToken)) - { - token = characterToken; - } - else if (KeywordSemanticToken.TryParse(_line, _charPosition, _currentNode, out var keywordToken)) - { - token = keywordToken; - } - else if (OperatorSemanticToken.TryParse(_line, _charPosition, _currentNode, out var operatorToken)) - { - token = operatorToken; - } - else if (NumberSemanticToken.TryParse(_line, _charPosition, _currentNode, out var numberToken)) - { - token = numberToken; - } - else if (IdentifierSemanticToken.TryParse(_line, _charPosition, _currentNode, out var identifierToken)) - { - token = identifierToken; - } + _token = new LinkedList(); - if (token != null) - { - _tokens.Add(token); - // 根据词法单元的长度移动currentNode - MoveCurrentNode(token.LiteralValue.Length); + if (IsLetter()) { + _state = StateType.Word; + } + else if (IsDigit()) { + _state = StateType.Digit; + } + else if (IsDelimiter()) { + _state = StateType.Delimiter; } else { - // 未能识别的字符,跳过 - MoveCurrentNode(1); + _state = StateType.Other; } - } - // tokens.Add(new EOFToken(line, charPosition)); // 添加EOF标记 + switch (_state) + { + case StateType.Word: { + while (IsDigit() || IsLetter()) + { + Cat(); + GetChar(); + } + Retract(); + + if (IsKeyword()) + { + KeywordType keywordType = + KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First)); + MakeToken(keywordType); + } + else { + MakeToken(SemanticTokenType.Identifier); + } + break; + } + + case StateType.Digit: + { + bool error = false; + bool tag = false; // 用于标记是否已经处理过科学记数法的指数部分 + bool doubleDot = false; + NumberType numberType = NumberType.Integer; + + while (IsDigit() || _ch == '.' || _ch == 'E' || _ch == '+' || _ch == '-' || _ch == 'e' || IsLetter()) { + if (_ch != '.') + { + Cat(); + } + + + if (_ch == '0' && !tag) { + GetChar(); + if (_ch == 'x' || _ch == 'X') { + numberType = NumberType.Hex; // 标识十六进制 + Cat(); + while (IsHexDigit()) { // 假设IsHexDigit方法能够识别十六进制数字 + Cat(); + } + break; + } + Retract(); // 如果不是'x'或'X',回退一个字符 + } + else if (_ch == '.') { + GetChar(); + if (_ch == '.') { + Retract(); // 回退到第一个'.' + Retract(); // 回退到'.'之前的数字 + doubleDot = true; + break; + } + Retract(); + Cat(); + numberType = NumberType.Real; + } + else if ((_ch == 'e' || _ch == 'E') && !tag) { + GetChar(); + if (IsDigit() || _ch == '+' || _ch == '-') { + Cat(); + tag = true; // 已处理指数部分 + continue; + } + error = true; // 错误的科学记数法 + break; + } + + GetChar(); + } + + if (!error) { + MakeToken(numberType); + if (doubleDot) + { + break; + } + Retract(); + } + else + { + Retract(); + PrintError(0,_token.First,_line); + _tokenCount[SemanticTokenType.Error]++; + } + break; + } + + case StateType.Delimiter: + Cat(); + switch (_ch) + { + case '.': + { + GetChar(); + if (_ch == '.') + { + Cat(); + MakeToken(DelimiterType.DoubleDots); + break; + } + Retract(); + if (IsPeriod()) + { + + }else if (IsDot()) + { + + } + } + break; + case '\'': + case '\"': + { + if(_ch == '\'') MakeToken(DelimiterType.SingleQuotation); + else if(_ch == '\"') MakeToken(DelimiterType.DoubleQuotation); + + // 重置_token,准备收集字符串内容 + _token = new LinkedList(); + + GetChar(); // 移动到下一个字符,即字符串的第一个字符 + while (_ch != '\'' && _ch != '\"') + { + Cat(); // 收集字符 + GetChar(); // 移动到下一个字符 + } + + // 在退出循环时,_ch为'或EOF,此时_token包含字符串内容 + // 创建字符内容的token,注意这里使用SemanticTokenType.String表示字符串字面量 + MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType + _token = new LinkedList(); // 重置_token + + if (_ch == '\'' && _ch != '\n') + { + // 识别并创建最后一个单引号的token + Cat(); + MakeToken(DelimiterType.SingleQuotation); + } + else if (_ch == '\"') + { + Cat(); + MakeToken(DelimiterType.DoubleQuotation); + } + else + { + // 这里处理遇到EOF但没有闭合单引号的情况,例如:'字符串结尾没有单引号 + // 可以添加错误处理代码 + PrintError(0, _token.First, _line); // 假设这个方法用于打印错误 + } + } + break; + case ',': + MakeToken(DelimiterType.Comma); + break; + case ':': + MakeToken(DelimiterType.Colon); + break; + case ';': + MakeToken(DelimiterType.Semicolon); + break; + case '(': + MakeToken(DelimiterType.LeftParenthesis); + break; + case ')': + MakeToken(DelimiterType.RightParenthesis); + break; + case '[': + MakeToken(DelimiterType.LeftSquareBracket); + break; + case ']': + MakeToken(DelimiterType.RightSquareBracket); + break; + } + break; + + case StateType.Other: + DealOther(); + break; + default: + throw new ArgumentOutOfRangeException(); + } + + } + PrintResult(); return _tokens; } - private void SkipWhitespace() + private bool IsDot() { - while (_currentNode != null && char.IsWhiteSpace(_currentNode.Value)) + SemanticToken tokenBefore = _tokens.Last(); + if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true; + return false; + } + + private bool IsPeriod() + { + SemanticToken tokenBefore = _tokens.Last(); + if (tokenBefore.TokenType == SemanticTokenType.Keyword) return true; + return false; + } + + private void DealOther() + { + switch (_ch) { - if (_currentNode.Value == '\n') - { - _line++; - _charPosition = 0; - } - _currentNode = _currentNode.Next; + case '+': // 识别 + + Cat(); + MakeToken(OperatorType.Plus); + break; + case '-': // 识别 - + Cat(); + MakeToken(OperatorType.Minus); + break; + case '*': // 识别 * + Cat(); + MakeToken(OperatorType.Multiply); + break; + case '/': // 识别 / + Cat(); + MakeToken(OperatorType.Divide); + break; + case '=': + Cat(); + MakeToken(OperatorType.Equal); + break; + case '<': + Cat(); + GetChar(); + if (_ch == '=') + { + // 识别 <= + Cat(); + MakeToken(OperatorType.LessEqual); + } + else if(_ch == '>') + { + // 识别 <> + Cat(); + MakeToken(OperatorType.NotEqual); + } + else + { + // 识别 < + Retract(); + MakeToken(OperatorType.Less); + } + break; + case '>': + Cat(); + GetChar(); + if (_ch == '=') + { + // 识别 >= + Cat(); + MakeToken(OperatorType.GreaterEqual); + } + else + { + // 识别 > + Retract(); + MakeToken(OperatorType.Greater); + } + break; + case ':': + Cat(); + GetChar(); + if (_ch == '=') + { + // 识别 := + Cat(); + MakeToken(OperatorType.Assign); + } + else + { + // 这里应该被识别为delimiter逻辑上 + Cat(); + PrintError(1, _token.First, _line); + _tokenCount[SemanticTokenType.Error]++; + } + break; + default: + Cat(); + PrintError(1, _token.First, _line); + _tokenCount[SemanticTokenType.Error]++; + break; } } - private void MoveCurrentNode(int steps) + private void MakeToken(SemanticTokenType tokenType) { - for (int i = 0; i < steps && _currentNode != null; i++) + SemanticToken? token; + if (_token.First == null) { - _currentNode = _currentNode.Next; + Console.WriteLine("11"); + } + switch (tokenType) + { + case SemanticTokenType.Character: + CharacterSemanticToken characterSemanticToken = new CharacterSemanticToken() + { + LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First), + }; + token = characterSemanticToken; + break; + case SemanticTokenType.Identifier: + IdentifierSemanticToken identifierSemanticToken = new IdentifierSemanticToken() + { + LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First), + }; + token = identifierSemanticToken; + break; + case SemanticTokenType.Error: + ErrorSemanticToken errorSemanticToken = new ErrorSemanticToken() + { + LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First), + }; + token = errorSemanticToken; + break; + + default: + throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null); + } + + if (token != null) + { + _tokens.Add(token); + _tokenCount[tokenType]++; + Console.WriteLine($"<{tokenType}>"); + Console.WriteLine(LinkedListToString(_token.First)); + } + + + } + + private void MakeToken(KeywordType keywordType) + { + KeywordSemanticToken keywordSemanticToken = new KeywordSemanticToken + { + LinePos = _line, + CharacterPos = _chPos, + LiteralValue = LinkedListToString(_token.First), + KeywordType = keywordType + }; + _tokens.Add(keywordSemanticToken); + _tokenCount[SemanticTokenType.Keyword]++; + Console.WriteLine($"<{SemanticTokenType.Keyword}> <{keywordType}>"); + Console.WriteLine(LinkedListToString(_token.First)); + } + + private void MakeToken(DelimiterType delimiterType) + { + DelimiterSemanticToken delimiterSemanticToken = new DelimiterSemanticToken() + { + LinePos = _line, + CharacterPos = _chPos, + LiteralValue = LinkedListToString(_token.First), + DelimiterType = delimiterType + }; + _tokens.Add(delimiterSemanticToken); + _tokenCount[SemanticTokenType.Delimiter]++; + Console.WriteLine($"<{SemanticTokenType.Delimiter}> <{delimiterType}>"); + Console.WriteLine(LinkedListToString(_token.First)); + } + + private void MakeToken(OperatorType operatorType) + { + OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken() + { + LinePos = _line, + CharacterPos = _chPos, + LiteralValue = LinkedListToString(_token.First), + OperatorType = operatorType + }; + _tokens.Add(operatorSemanticToken); + _tokenCount[SemanticTokenType.Operator]++; + Console.WriteLine($"<{SemanticTokenType.Operator}> <{operatorType}>"); + Console.WriteLine(LinkedListToString(_token.First)); + } + + private void MakeToken(NumberType numberType) + { + NumberSemanticToken numberSemanticToken = new NumberSemanticToken() + { + LinePos = _line, + CharacterPos = _chPos, + LiteralValue = LinkedListToString(_token.First), + NumberType = numberType + }; + _tokens.Add(numberSemanticToken); + _tokenCount[SemanticTokenType.Number]++; + Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>"); + Console.WriteLine(LinkedListToString(_token.First)); + } + + // 填充buffer操作 + private void FillLeftBuffer() { + //cout << "fill left" << endl; + for (int i = 0; i < _buffer.Length / 2; i++) { + _buffer[i] = '$'; + } + + // 确保source字符串足够长,避免超出范围 + int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos); + + // 使用Array.Copy方法 + Array.Copy(source.ToCharArray(), _sourcePos, _buffer, 0, lengthToCopy); + + _sourcePos += lengthToCopy; + + if (_sourcePos == source.Length) { + eof = true; + } + } + + private void FillRightBuffer() { + //cout << "fill right" << endl; + for (int i = _buffer.Length / 2; i < _buffer.Length; i++) { + _buffer[i] = '$'; + } + + // 确保source字符串足够长,避免超出范围 + int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos); + + // 使用Array.Copy方法 + Array.Copy(source.ToCharArray(), _sourcePos, _buffer, _buffer.Length / 2, lengthToCopy); + + _sourcePos += lengthToCopy; + + if (_sourcePos == source.Length) { + eof = true; + } + } + + private void PrintBuffer() { + for (int i = 0; i < _buffer.Length; i++) { + Console.WriteLine($"[{i}] {_buffer[i]}"); + } + } + + void DealEof() { + if (eof) _finish = true; + else if (_fwdPos < _buffer.Length / 2) { + FillRightBuffer(); + _fwdPos = _buffer.Length / 2; + } + else { + FillLeftBuffer(); + // start_pos = 0; + _fwdPos = 0; + } + } + + // 读取buffer操作 + void GetChar() { + if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos]; + _chPos++; + if (_ch == '$') { + DealEof(); + if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos]; + } + if (_fwdPos < _buffer.Length) _fwdPos++; + } + + private void GetNbc() { + while (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r') { + if (_ch == '\n') { + _line++; + _chPos = 0; + } + GetChar(); + } + } + + private void Retract() { + _fwdPos -= 2; + _chPos -= 2; + GetChar(); + } + + private void Cat() + { + _token.AddLast(_ch); + // cout << "加入" << ch << endl; + } + + private string LinkedListToString(LinkedListNode first) + { + // 使用 StringBuilder 来构建字符串 + StringBuilder sb = new StringBuilder(); + for (LinkedListNode node = first; node != null; node = node.Next) + { + sb.Append(node.Value); + } + + // 将 StringBuilder 的内容转换为字符串 + string result = sb.ToString(); + + return result; + } + + // 判断字符 + private bool IsDigit() { + if (_ch >= '0' && _ch <= '9') return true; + return false; + } + + private bool IsHexDigit() + { + if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true; + return false; + } + + private bool IsLetter() { + if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) { + return true; + } + return false; + } + + private bool IsKeyword() + { + string tokenString = LinkedListToString(_token.First); + + foreach (var t in _keywords) + { + if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true; + } + return false; + } + + + private bool IsDelimiter() + { + foreach (var delimiter in _delimiter) + { + if (delimiter.Contains(_ch)) + { + if (_ch != ':') + { + return true; + } + + GetChar(); + if (_ch == '=') + { + Retract(); + return false; + } + + return true; + } + } + return false; + } + + private void PrintToken(SemanticTokenType type, LinkedListNode token, uint line) + { + string tokenString = LinkedListToString(token); + string typeName = Enum.GetName(typeof(SemanticTokenType), type) ?? "Unknown"; + Console.WriteLine($"{line} <{typeName.ToUpperInvariant()},{tokenString}>"); + } + + // PrintToken(SemanticTokenType.Keyword, "if", 42); // 假设'if'是token,42是行号 + + private void PrintError(int type, LinkedListNode token, uint line) + { + string tokenString = LinkedListToString(token); + switch (type) + { + case 0: + Console.WriteLine($"{line} "); + break; + case 1: + Console.WriteLine($"{line} "); + break; + } + } + + // PrintError(0, "unexpected symbol", 42); // 假设 "unexpected symbol" 是错误的 token,42 是行号 + + private void PrintResult() + { + Console.WriteLine(_line); + foreach (var pair in _tokenCount) + { + Console.WriteLine($"{pair.Key}: {pair.Value}"); } } } + diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs index 72d8cf3..8e42917 100644 --- a/Canon.Core/LexicalParser/SemanticToken.cs +++ b/Canon.Core/LexicalParser/SemanticToken.cs @@ -129,6 +129,46 @@ public class KeywordSemanticToken : SemanticToken public required KeywordType KeywordType { get; init; } + public static readonly Dictionary KeywordTypes = new Dictionary(StringComparer.OrdinalIgnoreCase) + { + { "program", KeywordType.Program }, + { "const", KeywordType.Const }, + { "var", KeywordType.Var }, + { "procedure", KeywordType.Procedure }, + { "function", KeywordType.Function }, + { "begin", KeywordType.Begin }, + { "end", KeywordType.End }, + { "array", KeywordType.Array }, + { "of", KeywordType.Of }, + { "if", KeywordType.If }, + { "then", KeywordType.Then }, + { "else", KeywordType.Else }, + { "for", KeywordType.For }, + { "to", KeywordType.To }, + { "do", KeywordType.Do }, + { "integer", KeywordType.Integer }, + { "real", KeywordType.Real }, + { "boolean", KeywordType.Boolean }, + { "character", KeywordType.Character }, + { "div", KeywordType.Divide }, // 注意: Pascal 使用 'div' 而不是 '/' + { "not", KeywordType.Not }, + { "mod", KeywordType.Mod }, + { "and", KeywordType.And }, + { "or", KeywordType.Or } + }; + + public static KeywordType GetKeywordTypeByKeyword(string keyword) + { + if (KeywordTypes.TryGetValue(keyword, out var keywordType)) + { + return keywordType; + } + else + { + throw new ArgumentException($"Unknown keyword: {keyword}"); + } + } + public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now, out KeywordSemanticToken? token) { @@ -200,7 +240,6 @@ public class OperatorSemanticToken : SemanticToken /// /// 数值类型记号 /// -/// TODO:进制表示(只有$1的十六进制表示) public class NumberSemanticToken : SemanticToken { public override SemanticTokenType TokenType => SemanticTokenType.Number; @@ -287,3 +326,19 @@ public class EndSemanticToken : SemanticToken { public override SemanticTokenType TokenType => SemanticTokenType.End; } + +/// +/// 错误类型记号 +/// +public class ErrorSemanticToken : SemanticToken +{ + public override SemanticTokenType TokenType => SemanticTokenType.Error; + + public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now, + out IdentifierSemanticToken? token) + { + token = null; + return false; + } +} + diff --git a/Canon.Tests/LexicalParserTests/Array.cs b/Canon.Tests/LexicalParserTests/Array.cs deleted file mode 100644 index abd8c40..0000000 --- a/Canon.Tests/LexicalParserTests/Array.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Canon.Tests.LexicalParserTests; - -public class Array -{ - -} diff --git a/Canon.Tests/LexicalParserTests/DelimiterTests.cs b/Canon.Tests/LexicalParserTests/DelimiterTests.cs index 4c7e5a9..5b653db 100644 --- a/Canon.Tests/LexicalParserTests/DelimiterTests.cs +++ b/Canon.Tests/LexicalParserTests/DelimiterTests.cs @@ -3,7 +3,6 @@ using Canon.Core.LexicalParser; namespace Canon.Tests.LexicalParserTests; - public class DelimiterTests { [Theory] @@ -17,11 +16,12 @@ public class DelimiterTests [InlineData("]asd", DelimiterType.RightSquareBracket)] public void SmokeTest(string input, DelimiterType type) { - LinkedList content = Utils.GetLinkedList(input); + Lexer lexer = new(input); + List tokens = lexer.Tokenize(); - Assert.True(DelimiterSemanticToken.TryParse(0, 0, content.First!, - out DelimiterSemanticToken? token)); - Assert.NotNull(token); - Assert.Equal(type, token.DelimiterType); + SemanticToken token = tokens[0]; + Assert.Equal(SemanticTokenType.Delimiter, token.TokenType); + DelimiterSemanticToken delimiterSemanticToken = (DelimiterSemanticToken)token; + Assert.Equal(type, delimiterSemanticToken.DelimiterType); } } diff --git a/Canon.Tests/LexicalParserTests/IndentifierTests.cs b/Canon.Tests/LexicalParserTests/IndentifierTests.cs index cfb657e..a46a10b 100644 --- a/Canon.Tests/LexicalParserTests/IndentifierTests.cs +++ b/Canon.Tests/LexicalParserTests/IndentifierTests.cs @@ -18,20 +18,13 @@ namespace Canon.Tests.LexicalParserTests [InlineData("identifier_with_special_chars@#", false)] [InlineData("", false)] [InlineData(" ", false)] - [InlineData("andand",false)] + [InlineData("andand", false)] public void TestParseIdentifier(string input, bool expectedResult) { - LinkedList content = Utils.GetLinkedList(input); - Assert.Equal(expectedResult, IdentifierSemanticToken.TryParse(0, 0, content.First!, - out IdentifierSemanticToken? token)); - if (expectedResult) - { - Assert.NotNull(token); - } - else - { - Assert.Null(token); - } + Lexer lexer = new(input); + List tokens = lexer.Tokenize(); + + Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier); } } } diff --git a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs index e36d149..fd22191 100644 --- a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs +++ b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs @@ -23,11 +23,12 @@ public class KeywordTypeTests [InlineData("do", KeywordType.Do)] public void SmokeTest(string input, KeywordType type) { - LinkedList content = Utils.GetLinkedList(input); + Lexer lexer = new(input); + List tokens = lexer.Tokenize(); - Assert.True(KeywordSemanticToken.TryParse(0, 0, content.First!, - out KeywordSemanticToken? token)); - Assert.NotNull(token); - Assert.Equal(type, token.KeywordType); + SemanticToken token = tokens[0]; + Assert.Equal(SemanticTokenType.Keyword, token.TokenType); + KeywordSemanticToken keywordSemanticToken = (KeywordSemanticToken)token; + Assert.Equal(type, keywordSemanticToken.KeywordType); } } diff --git a/Canon.Tests/LexicalParserTests/NumberTests.cs b/Canon.Tests/LexicalParserTests/NumberTests.cs index eb2bd94..f2d7d0f 100644 --- a/Canon.Tests/LexicalParserTests/NumberTests.cs +++ b/Canon.Tests/LexicalParserTests/NumberTests.cs @@ -25,21 +25,22 @@ namespace Canon.Tests.LexicalParserTests [InlineData("1E", 0, NumberType.Real, false)] [InlineData("abc", 0, NumberType.Integer, false)] [InlineData("123abc", 123, NumberType.Integer, true)] - public void TestParseNumber(string input, double expected, NumberType expectedNumberType, bool expectedResult = true) + public void TestParseNumber(string input, double expected, NumberType expectedNumberType, + bool expectedResult = true) { - LinkedList content = Utils.GetLinkedList(input); - Assert.Equal(expectedResult, NumberSemanticToken.TryParse(0, 0, content.First!, - out NumberSemanticToken? token)); - if (expectedResult) + Lexer lexer = new(input); + List tokens = lexer.Tokenize(); + + SemanticToken token = tokens[0]; + if (!expectedResult) { - Assert.NotNull(token); - Assert.Equal(expected, token.Value); - Assert.Equal(expectedNumberType, token.NumberType); - } - else - { - Assert.Null(token); + Assert.NotEqual(SemanticTokenType.Keyword, token.TokenType); + return; } + Assert.Equal(SemanticTokenType.Number, token.TokenType); + NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token; + Assert.Equal(expectedNumberType, numberSemanticToken.NumberType); + Assert.Equal(expected, numberSemanticToken.Value); } } } diff --git a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs index 7c37117..b90a2c3 100644 --- a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs +++ b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs @@ -7,9 +7,7 @@ public class OperatorTypeTests { [Theory] [InlineData("+ 123", OperatorType.Plus)] - [InlineData("1 + 123", OperatorType.Plus)] [InlineData("+123", OperatorType.Plus)] - [InlineData("m +123", OperatorType.Plus)] [InlineData("-123", OperatorType.Minus)] [InlineData("*123", OperatorType.Multiply)] [InlineData("/123", OperatorType.Divide)] @@ -22,20 +20,24 @@ public class OperatorTypeTests [InlineData(":=123", OperatorType.Assign)] public void ParseTest(string input, OperatorType result) { - LinkedList content = Utils.GetLinkedList(input); - Assert.True(OperatorSemanticToken.TryParse(0, 0, - content.First!, out OperatorSemanticToken? token)); - Assert.Equal(result, token?.OperatorType); + Lexer lexer = new(input); + List tokens = lexer.Tokenize(); + + SemanticToken token = tokens[0]; + Assert.Equal(SemanticTokenType.Operator, token.TokenType); + OperatorSemanticToken operatorSemanticToken = (OperatorSemanticToken)token; + Assert.Equal(result, operatorSemanticToken.OperatorType); } [Theory] - [InlineData("<><123")] - [InlineData("<=<123")] + [InlineData("1 + 123")] + [InlineData("m +123")] public void ParseFailedTest(string input) { - LinkedList content = Utils.GetLinkedList(input); - Assert.False(OperatorSemanticToken.TryParse(0, 0, - content.First!, out OperatorSemanticToken? token)); - Assert.Null(token); + Lexer lexer = new(input); + List tokens = lexer.Tokenize(); + + SemanticToken token = tokens[0]; + Assert.NotEqual(SemanticTokenType.Operator, token.TokenType); } }