Canon/Canon.Core/LexicalParser/Lexer.cs

96 lines
3.2 KiB
C#
Raw Normal View History

2024-03-10 19:48:34 +08:00

namespace Canon.Core.LexicalParser
{
public class Lexer
{
private readonly LinkedList<char> source;
private LinkedListNode<char>? currentNode;
private uint line = 1;
private uint charPosition = 0;
private List<SemanticToken> tokens = new List<SemanticToken>();
public Lexer(string source)
{
// 将字符串转换为LinkedList<char>
this.source = new LinkedList<char>(source);
currentNode = this.source.First;
}
public List<SemanticToken> Tokenize()
{
while (currentNode != null)
{
charPosition = 0; // 重置字符位置
SkipWhitespace();
if (currentNode == null) break; // 如果跳过空格后到达了末尾,则退出循环
SemanticToken? token = null;
// 尝试解析各种类型的词法单元
if (DelimiterSemanticToken.TryParse(line, charPosition, currentNode, out var delimiterToken))
{
token = delimiterToken;
}
else if (CharacterSemanticToken.TryParse(line, charPosition, currentNode, out var characterToken))
{
token = characterToken;
}
else if (KeywordSemanticToken.TryParse(line, charPosition, currentNode, out var keywordToken))
{
token = keywordToken;
}
else if (OperatorSemanticToken.TryParse(line, charPosition, currentNode, out var operatorToken))
{
token = operatorToken;
}
else if (NumberSemanticToken.TryParse(line, charPosition, currentNode, out var numberToken))
{
token = numberToken;
}
else if (IdentifierSemanticToken.TryParse(line, charPosition, currentNode, out var identifierToken))
{
token = identifierToken;
}
if (token != null)
{
tokens.Add(token);
// 根据词法单元的长度移动currentNode
MoveCurrentNode(token.LiteralValue.Length);
}
else
{
// 未能识别的字符,跳过
MoveCurrentNode(1);
}
}
// tokens.Add(new EOFToken(line, charPosition)); // 添加EOF标记
return tokens;
}
private void SkipWhitespace()
{
while (currentNode != null && char.IsWhiteSpace(currentNode.Value))
{
if (currentNode.Value == '\n')
{
line++;
charPosition = 0;
}
currentNode = currentNode.Next;
}
}
private void MoveCurrentNode(int steps)
{
for (int i = 0; i < steps && currentNode != null; i++)
{
currentNode = currentNode.Next;
}
}
}
}