From 4353fb0c016976c719509c3845b0c5b35ae31f41 Mon Sep 17 00:00:00 2001 From: jackfiled Date: Sun, 21 Apr 2024 17:42:08 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E8=A1=A5=E5=85=85=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E7=BC=BA=E5=B0=91=E7=9A=84=E5=85=B3=E9=94=AE=E8=AF=8D=20(#49)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/49 --- Canon.Core/LexicalParser/LexRules.cs | 102 +++++++++++---------- Canon.Core/LexicalParser/Lexer.cs | 4 +- Canon.Core/LexicalParser/SemanticToken.cs | 106 +--------------------- 3 files changed, 57 insertions(+), 155 deletions(-) diff --git a/Canon.Core/LexicalParser/LexRules.cs b/Canon.Core/LexicalParser/LexRules.cs index 2746679..d6d607b 100644 --- a/Canon.Core/LexicalParser/LexRules.cs +++ b/Canon.Core/LexicalParser/LexRules.cs @@ -1,74 +1,76 @@ -namespace Canon.Core.LexicalParser; +using Canon.Core.Enums; + +namespace Canon.Core.LexicalParser; public static class LexRules { // 保留关键字 - private static readonly string[] _keywords = - [ - "Program", "Const", "Var", "Procedure", - "Function", "Begin", "End", "Array", - "Of", "If", "Then", "Else", - "For", "To", "Do", "Integer", - "Real", "Boolean", "Character", "Divide", - "Not", "Mod", "And", "Or" - ]; + private static readonly Dictionary s_keywordTypes = + new(StringComparer.OrdinalIgnoreCase) + { + { "program", KeywordType.Program }, + { "const", KeywordType.Const }, + { "var", KeywordType.Var }, + { "procedure", KeywordType.Procedure }, + { "function", KeywordType.Function }, + { "begin", KeywordType.Begin }, + { "end", KeywordType.End }, + { "array", KeywordType.Array }, + { "of", KeywordType.Of }, + { "if", KeywordType.If }, + { "then", KeywordType.Then }, + { "else", KeywordType.Else }, + { "for", KeywordType.For }, + { "to", KeywordType.To }, + { "do", KeywordType.Do }, + { "integer", KeywordType.Integer }, + { "real", KeywordType.Real }, + { "boolean", KeywordType.Boolean }, + { "char", KeywordType.Character }, + { "div", KeywordType.Divide }, // 注意: Pascal 使用 'div' 而不是 '/' + { "not", KeywordType.Not }, + { "mod", KeywordType.Mod }, + { "and", KeywordType.And }, + { "or", KeywordType.Or } + }; - private static readonly string[] _delimiter = [";", ",", ":", ".", "(", ")", "[", "]", "'", "\"", ".."]; + public static bool GetKeywordTypeByKeywprd(string keyword, out KeywordType type) + => s_keywordTypes.TryGetValue(keyword, out type); - private static readonly string[] _operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="]; + + private static readonly HashSet s_delimiter = [';', ',', ':', '.', '(', ')', '[', ']', '\'', '"']; + + private static readonly HashSet s_operator = ["=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", ":="]; // 判断字符 - public static bool IsDigit(char _ch) { - if (_ch >= '0' && _ch <= '9') return true; - return false; - } - - public static bool IsHexDigit(char _ch) + public static bool IsDigit(char ch) { - if ((_ch >= '0' && _ch <= '9') || (_ch<= 'F' && _ch >= 'A')) return true; + if (ch is >= '0' and <= '9') return true; return false; } - public static bool IsLetter(char _ch) { - if ((_ch >= 'A' && _ch <= 'Z') || (_ch >= 'a' && _ch <= 'z' || _ch == '_')) { + public static bool IsHexDigit(char ch) + { + if (ch is >= '0' and <= '9' || ch is <= 'F' and >= 'A') return true; + return false; + } + + public static bool IsLetter(char ch) + { + if (ch is >= 'A' and <= 'Z' || (ch is >= 'a' and <= 'z' || ch == '_')) + { return true; } + return false; } - public static bool IsKeyword(string tokenString) - { - - foreach (var t in _keywords) - { - if (string.Equals(tokenString, t, StringComparison.OrdinalIgnoreCase)) return true; - } - return false; - } - - public static bool IsDelimiter(char ch) - { - foreach (var delimiter in _delimiter) - { - if (delimiter.Contains(ch)) - { - return true; - } - } - return false; - } + => s_delimiter.Contains(ch); public static bool IsOperator(char ch) { - foreach (var o in _operator) - { - if (o.Contains(ch)) - { - return true; - } - } - return false; + return s_operator.Any(op => op.Contains(ch)); } public static bool IsBreakPoint(char ch) diff --git a/Canon.Core/LexicalParser/Lexer.cs b/Canon.Core/LexicalParser/Lexer.cs index 068c297..a19dc5b 100644 --- a/Canon.Core/LexicalParser/Lexer.cs +++ b/Canon.Core/LexicalParser/Lexer.cs @@ -231,10 +231,8 @@ public class Lexer : ILexer Retract(); string tokenString = GetCurrentTokenString(); - if (LexRules.IsKeyword(tokenString)) + if (LexRules.GetKeywordTypeByKeywprd(tokenString, out KeywordType keywordType)) { - KeywordType keywordType = - KeywordSemanticToken.GetKeywordTypeByKeyword(GetCurrentTokenString()); _semanticToken = LexemeFactory.MakeToken(keywordType, tokenString, _line, _chPos); } diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs index 7866b35..f1d6b82 100644 --- a/Canon.Core/LexicalParser/SemanticToken.cs +++ b/Canon.Core/LexicalParser/SemanticToken.cs @@ -105,39 +105,6 @@ public class DelimiterSemanticToken : SemanticToken public required DelimiterType DelimiterType { get; init; } - public static bool TryParse(uint linePos, uint characterPos, LinkedListNode now, - out DelimiterSemanticToken? token) - { - Dictionary delimiterMap = new() - { - { ',', DelimiterType.Comma }, - { '.', DelimiterType.Period }, - { ':', DelimiterType.Colon }, - { ';', DelimiterType.Semicolon }, - { '(', DelimiterType.LeftParenthesis }, - { ')', DelimiterType.RightParenthesis }, - { '[', DelimiterType.LeftSquareBracket }, - { ']', DelimiterType.RightSquareBracket }, - { '\'', DelimiterType.SingleQuotation }, - { '\"', DelimiterType.DoubleQuotation } - }; - - if (!delimiterMap.TryGetValue(now.Value, out DelimiterType value)) - { - token = null; - return false; - } - - token = new DelimiterSemanticToken - { - LinePos = linePos, - CharacterPos = characterPos, - LiteralValue = new string([now.Value]), - DelimiterType = value - }; - return true; - } - public override int GetHashCode() { return base.GetHashCode() ^ DelimiterType.GetHashCode(); @@ -153,50 +120,9 @@ public class KeywordSemanticToken : SemanticToken public required KeywordType KeywordType { get; init; } - public static readonly Dictionary KeywordTypes = - new Dictionary(StringComparer.OrdinalIgnoreCase) - { - { "program", KeywordType.Program }, - { "const", KeywordType.Const }, - { "var", KeywordType.Var }, - { "procedure", KeywordType.Procedure }, - { "function", KeywordType.Function }, - { "begin", KeywordType.Begin }, - { "end", KeywordType.End }, - { "array", KeywordType.Array }, - { "of", KeywordType.Of }, - { "if", KeywordType.If }, - { "then", KeywordType.Then }, - { "else", KeywordType.Else }, - { "for", KeywordType.For }, - { "to", KeywordType.To }, - { "do", KeywordType.Do }, - { "integer", KeywordType.Integer }, - { "real", KeywordType.Real }, - { "boolean", KeywordType.Boolean }, - { "character", KeywordType.Character }, - { "div", KeywordType.Divide }, // 注意: Pascal 使用 'div' 而不是 '/' - { "not", KeywordType.Not }, - { "mod", KeywordType.Mod }, - { "and", KeywordType.And }, - { "or", KeywordType.Or } - }; - - public static KeywordType GetKeywordTypeByKeyword(string keyword) - { - if (KeywordTypes.TryGetValue(keyword, out var keywordType)) - { - return keywordType; - } - else - { - throw new ArgumentException($"Unknown keyword: {keyword}"); - } - } - public override int GetHashCode() { - return base.GetHashCode() ^ this.KeywordType.GetHashCode(); + return base.GetHashCode() ^ KeywordType.GetHashCode(); } } @@ -209,33 +135,6 @@ public class OperatorSemanticToken : SemanticToken public required OperatorType OperatorType { get; init; } - public static readonly Dictionary OperatorTypes = new Dictionary - { - { "=", OperatorType.Equal }, - { "<>", OperatorType.NotEqual }, - { "<", OperatorType.Less }, - { "<=", OperatorType.LessEqual }, - { ">", OperatorType.Greater }, - { ">=", OperatorType.GreaterEqual }, - { "+", OperatorType.Plus }, - { "-", OperatorType.Minus }, - { "*", OperatorType.Multiply }, - { "/", OperatorType.Divide }, - { ":=", OperatorType.Assign } - }; - - public static OperatorType GetOperatorTypeByOperator(string operatorSymbol) - { - if (OperatorTypes.TryGetValue(operatorSymbol, out var operatorType)) - { - return operatorType; - } - else - { - throw new ArgumentException($"Unknown operator: {operatorSymbol}"); - } - } - public override int GetHashCode() { return base.GetHashCode() ^ OperatorType.GetHashCode(); @@ -270,6 +169,9 @@ public class IdentifierSemanticToken : SemanticToken public string IdentifierName => LiteralValue.ToLower(); } +/// +/// 终结符记号 +/// public class EndSemanticToken : SemanticToken { public override SemanticTokenType TokenType => SemanticTokenType.End;