From 35aec34a8eb6882b52c6cb2d4b2a3cb45a8d27fa Mon Sep 17 00:00:00 2001 From: jackfiled Date: Tue, 12 Mar 2024 14:52:42 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=AD=A3=E7=A1=AE=E5=A4=84=E7=90=86?= =?UTF-8?q?=E5=90=AB=E6=9C=89=E7=A9=BA=E4=BA=A7=E7=94=9F=E5=BC=8F=E7=9A=84?= =?UTF-8?q?=E8=AF=AD=E6=B3=95=20(#6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/6 --- Canon.Core/Enums/SemanticEnums.cs | 6 +- Canon.Core/GrammarParser/Grammar.cs | 11 +++ Canon.Core/GrammarParser/GrammarBuilder.cs | 38 +++++++++- Canon.Core/GrammarParser/SyntaxNode.cs | 2 +- Canon.Core/GrammarParser/Terminator.cs | 5 ++ .../GrammarParserTests/SimpleGrammarTests.cs | 2 +- .../SimpleGrammarWithEmptyTests.cs | 76 +++++++++++++++---- 7 files changed, 119 insertions(+), 21 deletions(-) diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs index 79dd150..fdd9c86 100644 --- a/Canon.Core/Enums/SemanticEnums.cs +++ b/Canon.Core/Enums/SemanticEnums.cs @@ -11,7 +11,11 @@ public enum SemanticTokenType /// /// 语法分析中的栈底符号 /// - End + End, + /// + /// 语法分析中的空串符号 + /// + Empty } public enum DelimiterType diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs index 18788df..7404db8 100644 --- a/Canon.Core/GrammarParser/Grammar.cs +++ b/Canon.Core/GrammarParser/Grammar.cs @@ -48,6 +48,17 @@ public class Grammar } break; } + + if (e.Right.Count == 0 && e.LookAhead == enumerator.Current) + { + // 考虑空产生式的归约 + // 显然空产生式是不能accept的 + reduceFlag = true; + SyntaxNode newNode = new(e.Left.Type); + + stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left], + newNode)); + } } if (acceptFlag) diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs index ab305a9..6641835 100644 --- a/Canon.Core/GrammarParser/GrammarBuilder.cs +++ b/Canon.Core/GrammarParser/GrammarBuilder.cs @@ -34,7 +34,7 @@ public class GrammarBuilder { foreach (List expression in pair.Value) { - // 对于空产生式直接跳过处理是正确的吗? + // TODO: 对于空产生式直接跳过处理是正确的吗 TerminatorBase? expressionHead = expression.FirstOrDefault(); if (expressionHead is null) { @@ -117,9 +117,23 @@ public class GrammarBuilder // 将该非终结符的FirstSet加入进来 NonTerminator nonTerminator = (NonTerminator)expressionHead; - if (FirstSet.TryGetValue(nonTerminator, out HashSet? firstSet)) + if (!FirstSet.TryGetValue(nonTerminator, out HashSet? firstSet)) { - result.UnionWith(firstSet); + throw new InvalidOperationException($"Failed to get first set for {nonTerminator}"); + } + + foreach (Terminator terminator in firstSet) + { + // 如果First中包含空字符串 + // 递归获得该字符之后的表达式的FirstSet + if (terminator == Terminator.EmptyTerminator) + { + result.UnionWith(CalculateFirstSetOfExpression(expression[1..])); + } + else + { + result.Add(terminator); + } } } @@ -175,9 +189,11 @@ public class GrammarBuilder { foreach (Terminator lookAhead in lookAheadSet) { + // 在新建Expression的时候就不用把空产生式放进右部里面了 Expression newExpression = new() { - Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0 + Left = nonTerminator, Right = IsEmptyOnly(nextExpression) ? [] : nextExpression, + LookAhead = lookAhead, Pos = 0 }; if (!closure.Contains(newExpression)) @@ -207,6 +223,8 @@ public class GrammarBuilder Expression begin = new() { + // 这里就不考虑右部可能为空产生式的情况了 + // 毕竟有拓广文法 Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0 }; @@ -284,4 +302,16 @@ public class GrammarBuilder return new Grammar { Begin = Begin, BeginState = beginState }; } + + private static bool IsEmptyOnly(List expression) + { + if (expression.Count != 1 || !expression[0].IsTerminated) + { + return false; + } + + Terminator terminator = (Terminator)expression[0]; + + return terminator == Terminator.EmptyTerminator; + } } diff --git a/Canon.Core/GrammarParser/SyntaxNode.cs b/Canon.Core/GrammarParser/SyntaxNode.cs index 8722147..899840a 100644 --- a/Canon.Core/GrammarParser/SyntaxNode.cs +++ b/Canon.Core/GrammarParser/SyntaxNode.cs @@ -89,7 +89,7 @@ public class SyntaxNode : IEquatable, IEnumerable } else { - // 在判等时是否需要判断子节点也相等? + // TODO: 在判等时是否需要判断子节点也相等 return GetNonTerminatorType() == other.GetNonTerminatorType(); } } diff --git a/Canon.Core/GrammarParser/Terminator.cs b/Canon.Core/GrammarParser/Terminator.cs index f0f7569..4b7e996 100644 --- a/Canon.Core/GrammarParser/Terminator.cs +++ b/Canon.Core/GrammarParser/Terminator.cs @@ -68,6 +68,11 @@ public class Terminator : TerminatorBase, IEquatable /// public static Terminator EndTerminator => new(SemanticTokenType.End); + /// + /// 空字符串的终结符 + /// + public static Terminator EmptyTerminator => new(SemanticTokenType.Empty); + public override int GetHashCode() { int hash = _terminatorType.GetHashCode(); diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs index 549799b..6c14ae1 100644 --- a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs @@ -89,7 +89,7 @@ public class SimpleGrammarTests } [Fact] - public void StatsTest() + public void StatesTest() { GrammarBuilder builder = new() { diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs index bbf58a1..e711f88 100644 --- a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs @@ -1,5 +1,6 @@ using Canon.Core.Enums; using Canon.Core.GrammarParser; +using Canon.Core.LexicalParser; using Xunit.Abstractions; namespace Canon.Tests.GrammarParserTests; @@ -10,12 +11,11 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) /// 带有空产生式的简单语法(课后题4.18) /// S -> A /// A -> BA | ε - /// B -> aB | b + /// B -> aB | a /// 为了方便测试指定 /// A ProgramStruct /// B ProgramBody /// a Identifier - /// b Identifier /// // private readonly ITestOutputHelper _testOutputHelper; @@ -34,7 +34,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) new NonTerminator(NonTerminatorType.ProgramBody), new NonTerminator(NonTerminatorType.ProgramStruct) ], - [] + [Terminator.EmptyTerminator] ] }, { @@ -59,21 +59,45 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) builder.Build(); Assert.Contains(builder.FirstSet, pair => - pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator)); - Assert.Contains(builder.FirstSet, pair => - pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct)); - Assert.Contains(builder.FirstSet, pair => - pair.Key == new NonTerminator(NonTerminatorType.ProgramBody)); - - foreach (HashSet terminators in builder.FirstSet.Values) { - Assert.Single(terminators); - Assert.Contains(Terminator.IdentifierTerminator, terminators); - } + if (pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator)) + { + Assert.Equal(2, pair.Value.Count); + Assert.Contains(Terminator.IdentifierTerminator, pair.Value); + Assert.Contains(Terminator.EmptyTerminator, pair.Value); + return true; + } + + return false; + }); + + Assert.Contains(builder.FirstSet, pair => + { + if (pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct)) + { + Assert.Equal(2, pair.Value.Count); + Assert.Contains(Terminator.IdentifierTerminator, pair.Value); + Assert.Contains(Terminator.EmptyTerminator, pair.Value); + return true; + } + + return true; + }); + Assert.Contains(builder.FirstSet, pair => + { + if (pair.Key == new NonTerminator(NonTerminatorType.ProgramBody)) + { + Assert.Single(pair.Value); + Assert.Contains(Terminator.IdentifierTerminator, pair.Value); + return true; + } + + return false; + }); } [Fact] - public void StatsTest() + public void StatesTest() { GrammarBuilder builder = new() { @@ -126,4 +150,28 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) _testOutputHelper.WriteLine("--- 5 ---"); _testOutputHelper.WriteLine(state5.ToString()); } + + [Fact] + public void AnalyseSingleSentenceTest() + { + GrammarBuilder builder = new() + { + Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator) + }; + + Grammar grammar = builder.Build(); + + List tokens = + [ + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" }, + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" }, + SemanticToken.End + ]; + + SyntaxNode root = grammar.Analyse(tokens); + + Assert.False(root.IsTerminated); + Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); + Assert.Equal(7, root.Count()); + } }