From 9930dbc42c0f78f59a23ae3768f002e4ef045e0e Mon Sep 17 00:00:00 2001 From: jackfiled Date: Mon, 11 Mar 2024 19:33:02 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AF=B9=E8=BE=93=E5=85=A5=E7=9A=84?= =?UTF-8?q?=E8=AE=B0=E5=8F=B7=E6=B5=81=E8=BF=9B=E8=A1=8C=E5=88=86=E6=9E=90?= =?UTF-8?q?=20(#4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加了语法分析中各个重要状态类的`ToString`方法,方便调试 Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/4 --- .gitea/workflows/test.yaml | 1 + Canon.Core/GrammarParser/Expression.cs | 20 +++++ Canon.Core/GrammarParser/Grammar.cs | 73 ++++++++++++++++++- Canon.Core/GrammarParser/LrState.cs | 16 +++- Canon.Core/GrammarParser/Terminator.cs | 19 ++++- Canon.Core/LexicalParser/SemanticToken.cs | 42 ++++++++++- .../GrammarParserTests/SimpleGrammarTests.cs | 25 +++++++ 7 files changed, 192 insertions(+), 4 deletions(-) diff --git a/.gitea/workflows/test.yaml b/.gitea/workflows/test.yaml index b69a95c..d5accf2 100644 --- a/.gitea/workflows/test.yaml +++ b/.gitea/workflows/test.yaml @@ -12,5 +12,6 @@ jobs: with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget + save-always: true - name: Run test code run: dotnet test diff --git a/Canon.Core/GrammarParser/Expression.cs b/Canon.Core/GrammarParser/Expression.cs index 980f0ef..6ccd9e0 100644 --- a/Canon.Core/GrammarParser/Expression.cs +++ b/Canon.Core/GrammarParser/Expression.cs @@ -78,6 +78,26 @@ public class Expression : IEquatable return hash; } + public override string ToString() + { + string result = $"{Left} -> "; + + for (int i = 0; i < Right.Count; i++) + { + if (i == Pos) + { + result += '~'; + } + + result += ' '; + result += Right[i].ToString(); + } + + result += $", {LookAhead}"; + + return result; + } + public static bool operator ==(Expression a, Expression b) { return a.Equals(b); diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs index d16a16f..8a55ae4 100644 --- a/Canon.Core/GrammarParser/Grammar.cs +++ b/Canon.Core/GrammarParser/Grammar.cs @@ -1,8 +1,79 @@ -namespace Canon.Core.GrammarParser; +using Canon.Core.LexicalParser; + +namespace Canon.Core.GrammarParser; public class Grammar { public required NonTerminator Begin { get; init; } public required LrState BeginState { get; init; } + + public void Analyse(IEnumerable tokens) + { + Stack stack = []; + stack.Push(BeginState); + + using IEnumerator enumerator = tokens.GetEnumerator(); + if (!enumerator.MoveNext()) + { + throw new InvalidOperationException("Input token list is empty"); + } + + while (true) + { + LrState top = stack.Peek(); + + // 尝试进行移进 + bool acceptFlag = false, reduceFlag = false; + foreach (Expression e in top.Expressions) + { + if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current) + { + if (e.Left == Begin) + { + acceptFlag = true; + } + else + { + reduceFlag = true; + + for (int i = 0; i < e.Right.Count; i++) + { + stack.Pop(); + } + + stack.Push(stack.Peek().Transformer[e.Left]); + } + } + } + + if (acceptFlag) + { + // 接受文法 退出循环 + break; + } + + if (reduceFlag) + { + // 归约 + continue; + } + + // 尝试进行移进 + if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next)) + { + stack.Push(next); + if (enumerator.MoveNext()) + { + continue; + } + else + { + throw new InvalidOperationException("Run out of token but not accept"); + } + } + + throw new InvalidOperationException("Failed to analyse input grammar"); + } + } } diff --git a/Canon.Core/GrammarParser/LrState.cs b/Canon.Core/GrammarParser/LrState.cs index 817a986..251f8c5 100644 --- a/Canon.Core/GrammarParser/LrState.cs +++ b/Canon.Core/GrammarParser/LrState.cs @@ -1,4 +1,6 @@ -namespace Canon.Core.GrammarParser; +using System.Text; + +namespace Canon.Core.GrammarParser; /// /// LR语法中的一个项目集规范族 @@ -76,6 +78,18 @@ public class LrState : IEquatable return hash; } + public override string ToString() + { + StringBuilder builder = new(); + + foreach (Expression e in Expressions) + { + builder.Append(e).Append('\n'); + } + + return builder.ToString(); + } + public static bool operator ==(LrState a, LrState b) { return a.Equals(b); diff --git a/Canon.Core/GrammarParser/Terminator.cs b/Canon.Core/GrammarParser/Terminator.cs index ca7b3b4..b12de71 100644 --- a/Canon.Core/GrammarParser/Terminator.cs +++ b/Canon.Core/GrammarParser/Terminator.cs @@ -66,7 +66,7 @@ public class Terminator : TerminatorBase, IEquatable /// /// 栈底的终结符 /// - public static Terminator EndTerminator => new(KeywordType.End); + public static Terminator EndTerminator => new(SemanticTokenType.End); public override int GetHashCode() { @@ -85,6 +85,21 @@ public class Terminator : TerminatorBase, IEquatable } } + public override string ToString() + { + switch (_terminatorType) + { + case SemanticTokenType.Keyword: + return _keywordType.ToString(); + case SemanticTokenType.Operator: + return _operatorType.ToString(); + case SemanticTokenType.Delimiter: + return _delimiterType.ToString(); + default: + return _keywordType.ToString(); + } + } + public bool Equals(Terminator? other) { if (other is null) @@ -190,6 +205,8 @@ public class NonTerminator : TerminatorBase, IEquatable return _type.GetHashCode(); } + public override string ToString() => _type.ToString(); + public bool Equals(NonTerminator? other) { if (other is null) diff --git a/Canon.Core/LexicalParser/SemanticToken.cs b/Canon.Core/LexicalParser/SemanticToken.cs index 66a57ea..2e87aea 100644 --- a/Canon.Core/LexicalParser/SemanticToken.cs +++ b/Canon.Core/LexicalParser/SemanticToken.cs @@ -1,4 +1,6 @@ -namespace Canon.Core.LexicalParser; +using Canon.Core.GrammarParser; + +namespace Canon.Core.LexicalParser; using Enums; @@ -23,6 +25,39 @@ public abstract class SemanticToken /// 记号的字面值 /// public required string LiteralValue { get; init; } + + public static implicit operator Terminator(SemanticToken token) + { + switch (token.TokenType) + { + case SemanticTokenType.Character: + return Terminator.CharacterTerminator; + case SemanticTokenType.Identifier: + return Terminator.IdentifierTerminator; + case SemanticTokenType.Number: + return Terminator.NumberTerminator; + case SemanticTokenType.End: + return Terminator.EndTerminator; + case SemanticTokenType.Delimiter: + return new Terminator(((DelimiterSemanticToken)token).DelimiterType); + case SemanticTokenType.Keyword: + return new Terminator(((KeywordSemanticToken)token).KeywordType); + case SemanticTokenType.Operator: + return new Terminator(((OperatorSemanticToken)token).OperatorType); + default: + throw new ArgumentException("Unknown token type"); + } + } + + /// + /// 栈底符号单例对象 + /// + public static EndSemanticToken End => new() + { + LinePos = 0, CharacterPos = 0, LiteralValue = string.Empty + }; + + public override string ToString() => LiteralValue; } /// @@ -189,3 +224,8 @@ public class IdentifierSemanticToken : SemanticToken return false; } } + +public class EndSemanticToken : SemanticToken +{ + public override SemanticTokenType TokenType => SemanticTokenType.End; +} diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs index 8b32d96..3d9b518 100644 --- a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs @@ -1,5 +1,6 @@ using Canon.Core.Enums; using Canon.Core.GrammarParser; +using Canon.Core.LexicalParser; namespace Canon.Tests.GrammarParserTests; @@ -107,4 +108,28 @@ public class SimpleGrammarTests Assert.Contains(new Terminator(DelimiterType.LeftParenthesis), grammar.BeginState.Transformer.Keys); Assert.Contains(Terminator.IdentifierTerminator, grammar.BeginState.Transformer.Keys); } + + [Fact] + public void AnalyseSingleSentenceTest() + { + GrammarBuilder builder = new() + { + Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator) + }; + + Grammar grammar = builder.Build(); + List tokens = + [ + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, + new OperatorSemanticToken + { + LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus + }, + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, + SemanticToken.End + ]; + + // 验证分析语句不会抛出错误 + grammar.Analyse(tokens); + } }