Canon/Canon.Core/LexicalParser/Lexer.cs

93 lines
2.9 KiB
C#
Raw Normal View History

2024-03-11 11:36:39 +08:00
namespace Canon.Core.LexicalParser;
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
public class Lexer
2024-03-10 19:48:34 +08:00
{
2024-03-11 11:36:39 +08:00
private readonly LinkedList<char> _source;
private LinkedListNode<char>? _currentNode;
private uint _line = 1;
private uint _charPosition;
private readonly List<SemanticToken> _tokens = [];
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
public Lexer(string source)
{
// 将字符串转换为LinkedList<char>
_source = new LinkedList<char>(source);
_currentNode = _source.First;
}
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
public List<SemanticToken> Tokenize()
{
while (_currentNode != null)
2024-03-10 19:48:34 +08:00
{
2024-03-11 11:36:39 +08:00
_charPosition = 0; // 重置字符位置
SkipWhitespace();
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
if (_currentNode == null) break; // 如果跳过空格后到达了末尾,则退出循环
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
SemanticToken? token = null;
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
// 尝试解析各种类型的词法单元
if (DelimiterSemanticToken.TryParse(_line, _charPosition, _currentNode, out var delimiterToken))
{
token = delimiterToken;
}
else if (CharacterSemanticToken.TryParse(_line, _charPosition, _currentNode, out var characterToken))
{
token = characterToken;
}
else if (KeywordSemanticToken.TryParse(_line, _charPosition, _currentNode, out var keywordToken))
{
token = keywordToken;
}
else if (OperatorSemanticToken.TryParse(_line, _charPosition, _currentNode, out var operatorToken))
{
token = operatorToken;
}
else if (NumberSemanticToken.TryParse(_line, _charPosition, _currentNode, out var numberToken))
{
token = numberToken;
}
else if (IdentifierSemanticToken.TryParse(_line, _charPosition, _currentNode, out var identifierToken))
{
token = identifierToken;
2024-03-10 19:48:34 +08:00
}
2024-03-11 11:36:39 +08:00
if (token != null)
{
_tokens.Add(token);
// 根据词法单元的长度移动currentNode
MoveCurrentNode(token.LiteralValue.Length);
}
else
{
// 未能识别的字符,跳过
MoveCurrentNode(1);
}
2024-03-10 19:48:34 +08:00
}
2024-03-11 11:36:39 +08:00
// tokens.Add(new EOFToken(line, charPosition)); // 添加EOF标记
return _tokens;
}
private void SkipWhitespace()
{
while (_currentNode != null && char.IsWhiteSpace(_currentNode.Value))
2024-03-10 19:48:34 +08:00
{
2024-03-11 11:36:39 +08:00
if (_currentNode.Value == '\n')
2024-03-10 19:48:34 +08:00
{
2024-03-11 11:36:39 +08:00
_line++;
_charPosition = 0;
2024-03-10 19:48:34 +08:00
}
2024-03-11 11:36:39 +08:00
_currentNode = _currentNode.Next;
2024-03-10 19:48:34 +08:00
}
2024-03-11 11:36:39 +08:00
}
2024-03-10 19:48:34 +08:00
2024-03-11 11:36:39 +08:00
private void MoveCurrentNode(int steps)
{
for (int i = 0; i < steps && _currentNode != null; i++)
2024-03-10 19:48:34 +08:00
{
2024-03-11 11:36:39 +08:00
_currentNode = _currentNode.Next;
2024-03-10 19:48:34 +08:00
}
}
}