From 95ff2233c12b13207b6754f04d59d00ba22d1c73 Mon Sep 17 00:00:00 2001 From: Huaps <1183155719@qq.com> Date: Sun, 10 Mar 2024 19:48:34 +0800 Subject: [PATCH] =?UTF-8?q?=E5=86=99=E4=BA=86=E4=B8=80=E4=B8=8Blexer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Canon.Core/LexicalParser/Lexer.cs | 95 +++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 Canon.Core/LexicalParser/Lexer.cs diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs new file mode 100644 index 0000000..31031b7 --- /dev/null +++ b/Canon.Core/LexicalParser/Lexer.cs @@ -0,0 +1,95 @@ + + +namespace Canon.Core.LexicalParser +{ + public class Lexer + { + private readonly LinkedList source; + private LinkedListNode? currentNode; + private uint line = 1; + private uint charPosition = 0; + private List tokens = new List(); + + public Lexer(string source) + { + // 将字符串转换为LinkedList + this.source = new LinkedList(source); + currentNode = this.source.First; + } + + public List Tokenize() + { + while (currentNode != null) + { + charPosition = 0; // 重置字符位置 + SkipWhitespace(); + + if (currentNode == null) break; // 如果跳过空格后到达了末尾,则退出循环 + + SemanticToken? token = null; + + // 尝试解析各种类型的词法单元 + if (DelimiterSemanticToken.TryParse(line, charPosition, currentNode, out var delimiterToken)) + { + token = delimiterToken; + } + else if (CharacterSemanticToken.TryParse(line, charPosition, currentNode, out var characterToken)) + { + token = characterToken; + } + else if (KeywordSemanticToken.TryParse(line, charPosition, currentNode, out var keywordToken)) + { + token = keywordToken; + } + else if (OperatorSemanticToken.TryParse(line, charPosition, currentNode, out var operatorToken)) + { + token = operatorToken; + } + else if (NumberSemanticToken.TryParse(line, charPosition, currentNode, out var numberToken)) + { + token = numberToken; + } + else if (IdentifierSemanticToken.TryParse(line, charPosition, currentNode, out var identifierToken)) + { + token = identifierToken; + } + + if (token != null) + { + tokens.Add(token); + // 根据词法单元的长度移动currentNode + MoveCurrentNode(token.LiteralValue.Length); + } + else + { + // 未能识别的字符,跳过 + MoveCurrentNode(1); + } + } + + // tokens.Add(new EOFToken(line, charPosition)); // 添加EOF标记 + return tokens; + } + + private void SkipWhitespace() + { + while (currentNode != null && char.IsWhiteSpace(currentNode.Value)) + { + if (currentNode.Value == '\n') + { + line++; + charPosition = 0; + } + currentNode = currentNode.Next; + } + } + + private void MoveCurrentNode(int steps) + { + for (int i = 0; i < steps && currentNode != null; i++) + { + currentNode = currentNode.Next; + } + } + } +}