diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs
index a334e37..6057329 100644
--- a/Canon.Core/Enums/SemanticEnums.cs
+++ b/Canon.Core/Enums/SemanticEnums.cs
@@ -8,14 +8,12 @@ public enum SemanticTokenType
Delimiter,
Identifier,
Character,
+ Empty,
+ Error, // 加了一个错误token
///
/// 语法分析中的栈底符号
///
- End,
- ///
- /// 语法分析中的空串符号
- ///
- Empty
+ End
}
public enum DelimiterType
@@ -89,3 +87,11 @@ public enum NumberType
Real,
Hex
}
+
+public enum StateType
+{
+ Word,
+ Digit,
+ Delimiter,
+ Other
+}
diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs
index 3d618d3..516de9b 100644
--- a/Canon.Core/LexicalParser/Lexer.cs
+++ b/Canon.Core/LexicalParser/Lexer.cs
@@ -1,92 +1,677 @@
-namespace Canon.Core.LexicalParser;
+using System.Text;
+using Canon.Core.Enums;
-public class Lexer
+namespace Canon.Core.LexicalParser;
+
+public class Lexer(string source)
{
- private readonly LinkedList _source;
- private LinkedListNode? _currentNode;
- private uint _line = 1;
- private uint _charPosition;
- private readonly List _tokens = [];
- public Lexer(string source)
+ // 保留关键字
+ private readonly string[] _keywords =
+ [
+ "Program", "Const", "Var", "Procedure",
+ "Function", "Begin", "End", "Array",
+ "Of", "If", "Then", "Else",
+ "For", "To", "Do", "Integer",
+ "Real", "Boolean", "Character", "Divide",
+ "Not", "Mod", "And", "Or"
+ ];
+
+ private readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]","'","\"",".."];
+
+ // 状态机
+ private StateType _state;
+ private char _ch;
+
+ private LinkedList _token = new LinkedList();
+ // bool save;
+ // int saved_state;
+ bool _finish;
+ private bool eof;
+
+ //缓冲区
+ private readonly char[] _buffer = new char[2048];
+ // int start_pos;
+ private int _fwdPos;
+
+ // 计数器
+ private uint _line = 1;
+ private uint _chPos;
+ private int _sourcePos;
+ private readonly Dictionary _tokenCount = new Dictionary
{
- // 将字符串转换为LinkedList
- _source = new LinkedList(source);
- _currentNode = _source.First;
- }
+ { SemanticTokenType.Keyword, 0 },
+ { SemanticTokenType.Number, 0 },
+ { SemanticTokenType.Operator, 0 },
+ { SemanticTokenType.Delimiter, 0 },
+ { SemanticTokenType.Identifier, 0 },
+ { SemanticTokenType.Character, 0 },
+ { SemanticTokenType.Error, 0 },
+ { SemanticTokenType.End, 0 }
+ };
+
+ private readonly List _tokens = [];
public List Tokenize()
{
- while (_currentNode != null)
- {
- _charPosition = 0; // 重置字符位置
- SkipWhitespace();
+ // 缓冲区
+ // start_pos = 0;
+ _fwdPos = 0;
+ FillLeftBuffer();
- if (_currentNode == null) break; // 如果跳过空格后到达了末尾,则退出循环
+ // 状态机
+ _finish = false;
- SemanticToken? token = null;
+ while (!_finish) {
+ GetChar();
+ GetNbc();
- // 尝试解析各种类型的词法单元
- if (DelimiterSemanticToken.TryParse(_line, _charPosition, _currentNode, out var delimiterToken))
- {
- token = delimiterToken;
- }
- else if (CharacterSemanticToken.TryParse(_line, _charPosition, _currentNode, out var characterToken))
- {
- token = characterToken;
- }
- else if (KeywordSemanticToken.TryParse(_line, _charPosition, _currentNode, out var keywordToken))
- {
- token = keywordToken;
- }
- else if (OperatorSemanticToken.TryParse(_line, _charPosition, _currentNode, out var operatorToken))
- {
- token = operatorToken;
- }
- else if (NumberSemanticToken.TryParse(_line, _charPosition, _currentNode, out var numberToken))
- {
- token = numberToken;
- }
- else if (IdentifierSemanticToken.TryParse(_line, _charPosition, _currentNode, out var identifierToken))
- {
- token = identifierToken;
- }
+ _token = new LinkedList();
- if (token != null)
- {
- _tokens.Add(token);
- // 根据词法单元的长度移动currentNode
- MoveCurrentNode(token.LiteralValue.Length);
+ if (IsLetter()) {
+ _state = StateType.Word;
+ }
+ else if (IsDigit()) {
+ _state = StateType.Digit;
+ }
+ else if (IsDelimiter()) {
+ _state = StateType.Delimiter;
}
else
{
- // 未能识别的字符,跳过
- MoveCurrentNode(1);
+ _state = StateType.Other;
}
- }
- // tokens.Add(new EOFToken(line, charPosition)); // 添加EOF标记
+ switch (_state)
+ {
+ case StateType.Word: {
+ while (IsDigit() || IsLetter())
+ {
+ Cat();
+ GetChar();
+ }
+ Retract();
+
+ if (IsKeyword())
+ {
+ KeywordType keywordType =
+ KeywordSemanticToken.GetKeywordTypeByKeyword(LinkedListToString(_token.First));
+ MakeToken(keywordType);
+ }
+ else {
+ MakeToken(SemanticTokenType.Identifier);
+ }
+ break;
+ }
+
+ case StateType.Digit:
+ {
+ bool error = false;
+ bool tag = false; // 用于标记是否已经处理过科学记数法的指数部分
+ bool doubleDot = false;
+ NumberType numberType = NumberType.Integer;
+
+ while (IsDigit() || _ch == '.' || _ch == 'E' || _ch == '+' || _ch == '-' || _ch == 'e' || IsLetter()) {
+ if (_ch != '.')
+ {
+ Cat();
+ }
+
+
+ if (_ch == '0' && !tag) {
+ GetChar();
+ if (_ch == 'x' || _ch == 'X') {
+ numberType = NumberType.Hex; // 标识十六进制
+ Cat();
+ while (IsHexDigit()) { // 假设IsHexDigit方法能够识别十六进制数字
+ Cat();
+ }
+ break;
+ }
+ Retract(); // 如果不是'x'或'X',回退一个字符
+ }
+ else if (_ch == '.') {
+ GetChar();
+ if (_ch == '.') {
+ Retract(); // 回退到第一个'.'
+ Retract(); // 回退到'.'之前的数字
+ doubleDot = true;
+ break;
+ }
+ Retract();
+ Cat();
+ numberType = NumberType.Real;
+ }
+ else if ((_ch == 'e' || _ch == 'E') && !tag) {
+ GetChar();
+ if (IsDigit() || _ch == '+' || _ch == '-') {
+ Cat();
+ tag = true; // 已处理指数部分
+ continue;
+ }
+ error = true; // 错误的科学记数法
+ break;
+ }
+
+ GetChar();
+ }
+
+ if (!error) {
+ MakeToken(numberType);
+ if (doubleDot)
+ {
+ break;
+ }
+ Retract();
+ }
+ else
+ {
+ Retract();
+ PrintError(0,_token.First,_line);
+ _tokenCount[SemanticTokenType.Error]++;
+ }
+ break;
+ }
+
+ case StateType.Delimiter:
+ Cat();
+ switch (_ch)
+ {
+ case '.':
+ {
+ GetChar();
+ if (_ch == '.')
+ {
+ Cat();
+ MakeToken(DelimiterType.DoubleDots);
+ break;
+ }
+ Retract();
+ if (IsPeriod())
+ {
+
+ }else if (IsDot())
+ {
+
+ }
+ }
+ break;
+ case '\'':
+ case '\"':
+ {
+ if(_ch == '\'') MakeToken(DelimiterType.SingleQuotation);
+ else if(_ch == '\"') MakeToken(DelimiterType.DoubleQuotation);
+
+ // 重置_token,准备收集字符串内容
+ _token = new LinkedList();
+
+ GetChar(); // 移动到下一个字符,即字符串的第一个字符
+ while (_ch != '\'' && _ch != '\"')
+ {
+ Cat(); // 收集字符
+ GetChar(); // 移动到下一个字符
+ }
+
+ // 在退出循环时,_ch为'或EOF,此时_token包含字符串内容
+ // 创建字符内容的token,注意这里使用SemanticTokenType.String表示字符串字面量
+ MakeToken(SemanticTokenType.Character); // 或其它适用于字符串字面量的SemanticTokenType
+ _token = new LinkedList(); // 重置_token
+
+ if (_ch == '\'' && _ch != '\n')
+ {
+ // 识别并创建最后一个单引号的token
+ Cat();
+ MakeToken(DelimiterType.SingleQuotation);
+ }
+ else if (_ch == '\"')
+ {
+ Cat();
+ MakeToken(DelimiterType.DoubleQuotation);
+ }
+ else
+ {
+ // 这里处理遇到EOF但没有闭合单引号的情况,例如:'字符串结尾没有单引号
+ // 可以添加错误处理代码
+ PrintError(0, _token.First, _line); // 假设这个方法用于打印错误
+ }
+ }
+ break;
+ case ',':
+ MakeToken(DelimiterType.Comma);
+ break;
+ case ':':
+ MakeToken(DelimiterType.Colon);
+ break;
+ case ';':
+ MakeToken(DelimiterType.Semicolon);
+ break;
+ case '(':
+ MakeToken(DelimiterType.LeftParenthesis);
+ break;
+ case ')':
+ MakeToken(DelimiterType.RightParenthesis);
+ break;
+ case '[':
+ MakeToken(DelimiterType.LeftSquareBracket);
+ break;
+ case ']':
+ MakeToken(DelimiterType.RightSquareBracket);
+ break;
+ }
+ break;
+
+ case StateType.Other:
+ DealOther();
+ break;
+ default:
+ throw new ArgumentOutOfRangeException();
+ }
+
+ }
+ PrintResult();
return _tokens;
}
- private void SkipWhitespace()
+ private bool IsDot()
{
- while (_currentNode != null && char.IsWhiteSpace(_currentNode.Value))
+ SemanticToken tokenBefore = _tokens.Last();
+ if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
+ return false;
+ }
+
+ private bool IsPeriod()
+ {
+ SemanticToken tokenBefore = _tokens.Last();
+ if (tokenBefore.TokenType == SemanticTokenType.Keyword) return true;
+ return false;
+ }
+
+ private void DealOther()
+ {
+ switch (_ch)
{
- if (_currentNode.Value == '\n')
- {
- _line++;
- _charPosition = 0;
- }
- _currentNode = _currentNode.Next;
+ case '+': // 识别 +
+ Cat();
+ MakeToken(OperatorType.Plus);
+ break;
+ case '-': // 识别 -
+ Cat();
+ MakeToken(OperatorType.Minus);
+ break;
+ case '*': // 识别 *
+ Cat();
+ MakeToken(OperatorType.Multiply);
+ break;
+ case '/': // 识别 /
+ Cat();
+ MakeToken(OperatorType.Divide);
+ break;
+ case '=':
+ Cat();
+ MakeToken(OperatorType.Equal);
+ break;
+ case '<':
+ Cat();
+ GetChar();
+ if (_ch == '=')
+ {
+ // 识别 <=
+ Cat();
+ MakeToken(OperatorType.LessEqual);
+ }
+ else if(_ch == '>')
+ {
+ // 识别 <>
+ Cat();
+ MakeToken(OperatorType.NotEqual);
+ }
+ else
+ {
+ // 识别 <
+ Retract();
+ MakeToken(OperatorType.Less);
+ }
+ break;
+ case '>':
+ Cat();
+ GetChar();
+ if (_ch == '=')
+ {
+ // 识别 >=
+ Cat();
+ MakeToken(OperatorType.GreaterEqual);
+ }
+ else
+ {
+ // 识别 >
+ Retract();
+ MakeToken(OperatorType.Greater);
+ }
+ break;
+ case ':':
+ Cat();
+ GetChar();
+ if (_ch == '=')
+ {
+ // 识别 :=
+ Cat();
+ MakeToken(OperatorType.Assign);
+ }
+ else
+ {
+ // 这里应该被识别为delimiter逻辑上
+ Cat();
+ PrintError(1, _token.First, _line);
+ _tokenCount[SemanticTokenType.Error]++;
+ }
+ break;
+ default:
+ Cat();
+ PrintError(1, _token.First, _line);
+ _tokenCount[SemanticTokenType.Error]++;
+ break;
}
}
- private void MoveCurrentNode(int steps)
+ private void MakeToken(SemanticTokenType tokenType)
{
- for (int i = 0; i < steps && _currentNode != null; i++)
+ SemanticToken? token;
+ if (_token.First == null)
{
- _currentNode = _currentNode.Next;
+ Console.WriteLine("11");
+ }
+ switch (tokenType)
+ {
+ case SemanticTokenType.Character:
+ CharacterSemanticToken characterSemanticToken = new CharacterSemanticToken()
+ {
+ LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First),
+ };
+ token = characterSemanticToken;
+ break;
+ case SemanticTokenType.Identifier:
+ IdentifierSemanticToken identifierSemanticToken = new IdentifierSemanticToken()
+ {
+ LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First),
+ };
+ token = identifierSemanticToken;
+ break;
+ case SemanticTokenType.Error:
+ ErrorSemanticToken errorSemanticToken = new ErrorSemanticToken()
+ {
+ LinePos = _line, CharacterPos = _chPos, LiteralValue = LinkedListToString(_token.First),
+ };
+ token = errorSemanticToken;
+ break;
+
+ default:
+ throw new ArgumentOutOfRangeException(nameof(tokenType), tokenType, null);
+ }
+
+ if (token != null)
+ {
+ _tokens.Add(token);
+ _tokenCount[tokenType]++;
+ Console.WriteLine($"<{tokenType}>");
+ Console.WriteLine(LinkedListToString(_token.First));
+ }
+
+
+ }
+
+ private void MakeToken(KeywordType keywordType)
+ {
+ KeywordSemanticToken keywordSemanticToken = new KeywordSemanticToken
+ {
+ LinePos = _line,
+ CharacterPos = _chPos,
+ LiteralValue = LinkedListToString(_token.First),
+ KeywordType = keywordType
+ };
+ _tokens.Add(keywordSemanticToken);
+ _tokenCount[SemanticTokenType.Keyword]++;
+ Console.WriteLine($"<{SemanticTokenType.Keyword}> <{keywordType}>");
+ Console.WriteLine(LinkedListToString(_token.First));
+ }
+
+ private void MakeToken(DelimiterType delimiterType)
+ {
+ DelimiterSemanticToken delimiterSemanticToken = new DelimiterSemanticToken()
+ {
+ LinePos = _line,
+ CharacterPos = _chPos,
+ LiteralValue = LinkedListToString(_token.First),
+ DelimiterType = delimiterType
+ };
+ _tokens.Add(delimiterSemanticToken);
+ _tokenCount[SemanticTokenType.Delimiter]++;
+ Console.WriteLine($"<{SemanticTokenType.Delimiter}> <{delimiterType}>");
+ Console.WriteLine(LinkedListToString(_token.First));
+ }
+
+ private void MakeToken(OperatorType operatorType)
+ {
+ OperatorSemanticToken operatorSemanticToken = new OperatorSemanticToken()
+ {
+ LinePos = _line,
+ CharacterPos = _chPos,
+ LiteralValue = LinkedListToString(_token.First),
+ OperatorType = operatorType
+ };
+ _tokens.Add(operatorSemanticToken);
+ _tokenCount[SemanticTokenType.Operator]++;
+ Console.WriteLine($"<{SemanticTokenType.Operator}> <{operatorType}>");
+ Console.WriteLine(LinkedListToString(_token.First));
+ }
+
+ private void MakeToken(NumberType numberType)
+ {
+ NumberSemanticToken numberSemanticToken = new NumberSemanticToken()
+ {
+ LinePos = _line,
+ CharacterPos = _chPos,
+ LiteralValue = LinkedListToString(_token.First),
+ NumberType = numberType
+ };
+ _tokens.Add(numberSemanticToken);
+ _tokenCount[SemanticTokenType.Number]++;
+ Console.WriteLine($"<{SemanticTokenType.Number}> <{numberType}>");
+ Console.WriteLine(LinkedListToString(_token.First));
+ }
+
+ // 填充buffer操作
+ private void FillLeftBuffer() {
+ //cout << "fill left" << endl;
+ for (int i = 0; i < _buffer.Length / 2; i++) {
+ _buffer[i] = '$';
+ }
+
+ // 确保source字符串足够长,避免超出范围
+ int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos);
+
+ // 使用Array.Copy方法
+ Array.Copy(source.ToCharArray(), _sourcePos, _buffer, 0, lengthToCopy);
+
+ _sourcePos += lengthToCopy;
+
+ if (_sourcePos == source.Length) {
+ eof = true;
+ }
+ }
+
+ private void FillRightBuffer() {
+ //cout << "fill right" << endl;
+ for (int i = _buffer.Length / 2; i < _buffer.Length; i++) {
+ _buffer[i] = '$';
+ }
+
+ // 确保source字符串足够长,避免超出范围
+ int lengthToCopy = Math.Min(_buffer.Length / 2 - 1, source.Length - _sourcePos);
+
+ // 使用Array.Copy方法
+ Array.Copy(source.ToCharArray(), _sourcePos, _buffer, _buffer.Length / 2, lengthToCopy);
+
+ _sourcePos += lengthToCopy;
+
+ if (_sourcePos == source.Length) {
+ eof = true;
+ }
+ }
+
+ private void PrintBuffer() {
+ for (int i = 0; i < _buffer.Length; i++) {
+ Console.WriteLine($"[{i}] {_buffer[i]}");
+ }
+ }
+
+ void DealEof() {
+ if (eof) _finish = true;
+ else if (_fwdPos < _buffer.Length / 2) {
+ FillRightBuffer();
+ _fwdPos = _buffer.Length / 2;
+ }
+ else {
+ FillLeftBuffer();
+ // start_pos = 0;
+ _fwdPos = 0;
+ }
+ }
+
+ // 读取buffer操作
+ void GetChar() {
+ if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos];
+ _chPos++;
+ if (_ch == '$') {
+ DealEof();
+ if (_fwdPos >= 0 && _fwdPos < _buffer.Length) _ch = _buffer[_fwdPos];
+ }
+ if (_fwdPos < _buffer.Length) _fwdPos++;
+ }
+
+ private void GetNbc() {
+ while (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r') {
+ if (_ch == '\n') {
+ _line++;
+ _chPos = 0;
+ }
+ GetChar();
+ }
+ }
+
+ private void Retract() {
+ _fwdPos -= 2;
+ _chPos -= 2;
+ GetChar();
+ }
+
+ private void Cat()
+ {
+ _token.AddLast(_ch);
+ // cout << "加入" << ch << endl;
+ }
+
+ private string LinkedListToString(LinkedListNode first)
+ {
+ // 使用 StringBuilder 来构建字符串
+ StringBuilder sb = new StringBuilder();
+ for (LinkedListNode node = first; node != null; node = node.Next)
+ {
+ sb.Append(node.Value);
+ }
+
+ // 将 StringBuilder 的内容转换为字符串
+ string result = sb.ToString();
+
+ return result;
+ }
+
+ // 判断字符
+ private bool IsDigit() {
+ if (_ch >= '0' && _ch <= '9') return true;
+ return false;
+ }
+
+ private bool IsHexDigit()
+ {
+ if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true;
+ return false;
+ }
+
+ private bool IsLetter() {
+ if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) {
+ return true;
+ }
+ return false;
+ }
+
+ private bool IsKeyword()
+ {
+ string tokenString = LinkedListToString(_token.First);
+
+ foreach (var t in _keywords)
+ {
+ if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true;
+ }
+ return false;
+ }
+
+
+ private bool IsDelimiter()
+ {
+ foreach (var delimiter in _delimiter)
+ {
+ if (delimiter.Contains(_ch))
+ {
+ if (_ch != ':')
+ {
+ return true;
+ }
+
+ GetChar();
+ if (_ch == '=')
+ {
+ Retract();
+ return false;
+ }
+
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void PrintToken(SemanticTokenType type, LinkedListNode token, uint line)
+ {
+ string tokenString = LinkedListToString(token);
+ string typeName = Enum.GetName(typeof(SemanticTokenType), type) ?? "Unknown";
+ Console.WriteLine($"{line} <{typeName.ToUpperInvariant()},{tokenString}>");
+ }
+
+ // PrintToken(SemanticTokenType.Keyword, "if", 42); // 假设'if'是token,42是行号
+
+ private void PrintError(int type, LinkedListNode token, uint line)
+ {
+ string tokenString = LinkedListToString(token);
+ switch (type)
+ {
+ case 0:
+ Console.WriteLine($"{line} ");
+ break;
+ case 1:
+ Console.WriteLine($"{line} ");
+ break;
+ }
+ }
+
+ // PrintError(0, "unexpected symbol", 42); // 假设 "unexpected symbol" 是错误的 token,42 是行号
+
+ private void PrintResult()
+ {
+ Console.WriteLine(_line);
+ foreach (var pair in _tokenCount)
+ {
+ Console.WriteLine($"{pair.Key}: {pair.Value}");
}
}
}
+
diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs
index 72d8cf3..8e42917 100644
--- a/Canon.Core/LexicalParser/SemanticToken.cs
+++ b/Canon.Core/LexicalParser/SemanticToken.cs
@@ -129,6 +129,46 @@ public class KeywordSemanticToken : SemanticToken
public required KeywordType KeywordType { get; init; }
+ public static readonly Dictionary KeywordTypes = new Dictionary(StringComparer.OrdinalIgnoreCase)
+ {
+ { "program", KeywordType.Program },
+ { "const", KeywordType.Const },
+ { "var", KeywordType.Var },
+ { "procedure", KeywordType.Procedure },
+ { "function", KeywordType.Function },
+ { "begin", KeywordType.Begin },
+ { "end", KeywordType.End },
+ { "array", KeywordType.Array },
+ { "of", KeywordType.Of },
+ { "if", KeywordType.If },
+ { "then", KeywordType.Then },
+ { "else", KeywordType.Else },
+ { "for", KeywordType.For },
+ { "to", KeywordType.To },
+ { "do", KeywordType.Do },
+ { "integer", KeywordType.Integer },
+ { "real", KeywordType.Real },
+ { "boolean", KeywordType.Boolean },
+ { "character", KeywordType.Character },
+ { "div", KeywordType.Divide }, // 注意: Pascal 使用 'div' 而不是 '/'
+ { "not", KeywordType.Not },
+ { "mod", KeywordType.Mod },
+ { "and", KeywordType.And },
+ { "or", KeywordType.Or }
+ };
+
+ public static KeywordType GetKeywordTypeByKeyword(string keyword)
+ {
+ if (KeywordTypes.TryGetValue(keyword, out var keywordType))
+ {
+ return keywordType;
+ }
+ else
+ {
+ throw new ArgumentException($"Unknown keyword: {keyword}");
+ }
+ }
+
public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now,
out KeywordSemanticToken? token)
{
@@ -200,7 +240,6 @@ public class OperatorSemanticToken : SemanticToken
///
/// 数值类型记号
///
-/// TODO:进制表示(只有$1的十六进制表示)
public class NumberSemanticToken : SemanticToken
{
public override SemanticTokenType TokenType => SemanticTokenType.Number;
@@ -287,3 +326,19 @@ public class EndSemanticToken : SemanticToken
{
public override SemanticTokenType TokenType => SemanticTokenType.End;
}
+
+///
+/// 错误类型记号
+///
+public class ErrorSemanticToken : SemanticToken
+{
+ public override SemanticTokenType TokenType => SemanticTokenType.Error;
+
+ public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now,
+ out IdentifierSemanticToken? token)
+ {
+ token = null;
+ return false;
+ }
+}
+
diff --git a/Canon.Tests/LexicalParserTests/Array.cs b/Canon.Tests/LexicalParserTests/Array.cs
deleted file mode 100644
index abd8c40..0000000
--- a/Canon.Tests/LexicalParserTests/Array.cs
+++ /dev/null
@@ -1,6 +0,0 @@
-namespace Canon.Tests.LexicalParserTests;
-
-public class Array
-{
-
-}
diff --git a/Canon.Tests/LexicalParserTests/DelimiterTests.cs b/Canon.Tests/LexicalParserTests/DelimiterTests.cs
index 4c7e5a9..5b653db 100644
--- a/Canon.Tests/LexicalParserTests/DelimiterTests.cs
+++ b/Canon.Tests/LexicalParserTests/DelimiterTests.cs
@@ -3,7 +3,6 @@ using Canon.Core.LexicalParser;
namespace Canon.Tests.LexicalParserTests;
-
public class DelimiterTests
{
[Theory]
@@ -17,11 +16,12 @@ public class DelimiterTests
[InlineData("]asd", DelimiterType.RightSquareBracket)]
public void SmokeTest(string input, DelimiterType type)
{
- LinkedList content = Utils.GetLinkedList(input);
+ Lexer lexer = new(input);
+ List tokens = lexer.Tokenize();
- Assert.True(DelimiterSemanticToken.TryParse(0, 0, content.First!,
- out DelimiterSemanticToken? token));
- Assert.NotNull(token);
- Assert.Equal(type, token.DelimiterType);
+ SemanticToken token = tokens[0];
+ Assert.Equal(SemanticTokenType.Delimiter, token.TokenType);
+ DelimiterSemanticToken delimiterSemanticToken = (DelimiterSemanticToken)token;
+ Assert.Equal(type, delimiterSemanticToken.DelimiterType);
}
}
diff --git a/Canon.Tests/LexicalParserTests/IndentifierTests.cs b/Canon.Tests/LexicalParserTests/IndentifierTests.cs
index cfb657e..a46a10b 100644
--- a/Canon.Tests/LexicalParserTests/IndentifierTests.cs
+++ b/Canon.Tests/LexicalParserTests/IndentifierTests.cs
@@ -18,20 +18,13 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("identifier_with_special_chars@#", false)]
[InlineData("", false)]
[InlineData(" ", false)]
- [InlineData("andand",false)]
+ [InlineData("andand", false)]
public void TestParseIdentifier(string input, bool expectedResult)
{
- LinkedList content = Utils.GetLinkedList(input);
- Assert.Equal(expectedResult, IdentifierSemanticToken.TryParse(0, 0, content.First!,
- out IdentifierSemanticToken? token));
- if (expectedResult)
- {
- Assert.NotNull(token);
- }
- else
- {
- Assert.Null(token);
- }
+ Lexer lexer = new(input);
+ List tokens = lexer.Tokenize();
+
+ Assert.Equal(expectedResult, tokens.FirstOrDefault()?.TokenType == SemanticTokenType.Identifier);
}
}
}
diff --git a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
index e36d149..fd22191 100644
--- a/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/KeywordTypeTests.cs
@@ -23,11 +23,12 @@ public class KeywordTypeTests
[InlineData("do", KeywordType.Do)]
public void SmokeTest(string input, KeywordType type)
{
- LinkedList content = Utils.GetLinkedList(input);
+ Lexer lexer = new(input);
+ List tokens = lexer.Tokenize();
- Assert.True(KeywordSemanticToken.TryParse(0, 0, content.First!,
- out KeywordSemanticToken? token));
- Assert.NotNull(token);
- Assert.Equal(type, token.KeywordType);
+ SemanticToken token = tokens[0];
+ Assert.Equal(SemanticTokenType.Keyword, token.TokenType);
+ KeywordSemanticToken keywordSemanticToken = (KeywordSemanticToken)token;
+ Assert.Equal(type, keywordSemanticToken.KeywordType);
}
}
diff --git a/Canon.Tests/LexicalParserTests/NumberTests.cs b/Canon.Tests/LexicalParserTests/NumberTests.cs
index eb2bd94..f2d7d0f 100644
--- a/Canon.Tests/LexicalParserTests/NumberTests.cs
+++ b/Canon.Tests/LexicalParserTests/NumberTests.cs
@@ -25,21 +25,22 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("1E", 0, NumberType.Real, false)]
[InlineData("abc", 0, NumberType.Integer, false)]
[InlineData("123abc", 123, NumberType.Integer, true)]
- public void TestParseNumber(string input, double expected, NumberType expectedNumberType, bool expectedResult = true)
+ public void TestParseNumber(string input, double expected, NumberType expectedNumberType,
+ bool expectedResult = true)
{
- LinkedList content = Utils.GetLinkedList(input);
- Assert.Equal(expectedResult, NumberSemanticToken.TryParse(0, 0, content.First!,
- out NumberSemanticToken? token));
- if (expectedResult)
+ Lexer lexer = new(input);
+ List tokens = lexer.Tokenize();
+
+ SemanticToken token = tokens[0];
+ if (!expectedResult)
{
- Assert.NotNull(token);
- Assert.Equal(expected, token.Value);
- Assert.Equal(expectedNumberType, token.NumberType);
- }
- else
- {
- Assert.Null(token);
+ Assert.NotEqual(SemanticTokenType.Keyword, token.TokenType);
+ return;
}
+ Assert.Equal(SemanticTokenType.Number, token.TokenType);
+ NumberSemanticToken numberSemanticToken = (NumberSemanticToken)token;
+ Assert.Equal(expectedNumberType, numberSemanticToken.NumberType);
+ Assert.Equal(expected, numberSemanticToken.Value);
}
}
}
diff --git a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
index 7c37117..b90a2c3 100644
--- a/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
+++ b/Canon.Tests/LexicalParserTests/OperatorTypeTests.cs
@@ -7,9 +7,7 @@ public class OperatorTypeTests
{
[Theory]
[InlineData("+ 123", OperatorType.Plus)]
- [InlineData("1 + 123", OperatorType.Plus)]
[InlineData("+123", OperatorType.Plus)]
- [InlineData("m +123", OperatorType.Plus)]
[InlineData("-123", OperatorType.Minus)]
[InlineData("*123", OperatorType.Multiply)]
[InlineData("/123", OperatorType.Divide)]
@@ -22,20 +20,24 @@ public class OperatorTypeTests
[InlineData(":=123", OperatorType.Assign)]
public void ParseTest(string input, OperatorType result)
{
- LinkedList content = Utils.GetLinkedList(input);
- Assert.True(OperatorSemanticToken.TryParse(0, 0,
- content.First!, out OperatorSemanticToken? token));
- Assert.Equal(result, token?.OperatorType);
+ Lexer lexer = new(input);
+ List tokens = lexer.Tokenize();
+
+ SemanticToken token = tokens[0];
+ Assert.Equal(SemanticTokenType.Operator, token.TokenType);
+ OperatorSemanticToken operatorSemanticToken = (OperatorSemanticToken)token;
+ Assert.Equal(result, operatorSemanticToken.OperatorType);
}
[Theory]
- [InlineData("<><123")]
- [InlineData("<=<123")]
+ [InlineData("1 + 123")]
+ [InlineData("m +123")]
public void ParseFailedTest(string input)
{
- LinkedList content = Utils.GetLinkedList(input);
- Assert.False(OperatorSemanticToken.TryParse(0, 0,
- content.First!, out OperatorSemanticToken? token));
- Assert.Null(token);
+ Lexer lexer = new(input);
+ List tokens = lexer.Tokenize();
+
+ SemanticToken token = tokens[0];
+ Assert.NotEqual(SemanticTokenType.Operator, token.TokenType);
}
}