diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs
index 79dd150..fdd9c86 100644
--- a/Canon.Core/Enums/SemanticEnums.cs
+++ b/Canon.Core/Enums/SemanticEnums.cs
@@ -11,7 +11,11 @@ public enum SemanticTokenType
///
/// 语法分析中的栈底符号
///
- End
+ End,
+ ///
+ /// 语法分析中的空串符号
+ ///
+ Empty
}
public enum DelimiterType
diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs
index 18788df..7404db8 100644
--- a/Canon.Core/GrammarParser/Grammar.cs
+++ b/Canon.Core/GrammarParser/Grammar.cs
@@ -48,6 +48,17 @@ public class Grammar
}
break;
}
+
+ if (e.Right.Count == 0 && e.LookAhead == enumerator.Current)
+ {
+ // 考虑空产生式的归约
+ // 显然空产生式是不能accept的
+ reduceFlag = true;
+ SyntaxNode newNode = new(e.Left.Type);
+
+ stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
+ newNode));
+ }
}
if (acceptFlag)
diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs
index ab305a9..6641835 100644
--- a/Canon.Core/GrammarParser/GrammarBuilder.cs
+++ b/Canon.Core/GrammarParser/GrammarBuilder.cs
@@ -34,7 +34,7 @@ public class GrammarBuilder
{
foreach (List expression in pair.Value)
{
- // 对于空产生式直接跳过处理是正确的吗?
+ // TODO: 对于空产生式直接跳过处理是正确的吗
TerminatorBase? expressionHead = expression.FirstOrDefault();
if (expressionHead is null)
{
@@ -117,9 +117,23 @@ public class GrammarBuilder
// 将该非终结符的FirstSet加入进来
NonTerminator nonTerminator = (NonTerminator)expressionHead;
- if (FirstSet.TryGetValue(nonTerminator, out HashSet? firstSet))
+ if (!FirstSet.TryGetValue(nonTerminator, out HashSet? firstSet))
{
- result.UnionWith(firstSet);
+ throw new InvalidOperationException($"Failed to get first set for {nonTerminator}");
+ }
+
+ foreach (Terminator terminator in firstSet)
+ {
+ // 如果First中包含空字符串
+ // 递归获得该字符之后的表达式的FirstSet
+ if (terminator == Terminator.EmptyTerminator)
+ {
+ result.UnionWith(CalculateFirstSetOfExpression(expression[1..]));
+ }
+ else
+ {
+ result.Add(terminator);
+ }
}
}
@@ -175,9 +189,11 @@ public class GrammarBuilder
{
foreach (Terminator lookAhead in lookAheadSet)
{
+ // 在新建Expression的时候就不用把空产生式放进右部里面了
Expression newExpression = new()
{
- Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0
+ Left = nonTerminator, Right = IsEmptyOnly(nextExpression) ? [] : nextExpression,
+ LookAhead = lookAhead, Pos = 0
};
if (!closure.Contains(newExpression))
@@ -207,6 +223,8 @@ public class GrammarBuilder
Expression begin = new()
{
+ // 这里就不考虑右部可能为空产生式的情况了
+ // 毕竟有拓广文法
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
};
@@ -284,4 +302,16 @@ public class GrammarBuilder
return new Grammar { Begin = Begin, BeginState = beginState };
}
+
+ private static bool IsEmptyOnly(List expression)
+ {
+ if (expression.Count != 1 || !expression[0].IsTerminated)
+ {
+ return false;
+ }
+
+ Terminator terminator = (Terminator)expression[0];
+
+ return terminator == Terminator.EmptyTerminator;
+ }
}
diff --git a/Canon.Core/GrammarParser/SyntaxNode.cs b/Canon.Core/GrammarParser/SyntaxNode.cs
index 8722147..899840a 100644
--- a/Canon.Core/GrammarParser/SyntaxNode.cs
+++ b/Canon.Core/GrammarParser/SyntaxNode.cs
@@ -89,7 +89,7 @@ public class SyntaxNode : IEquatable, IEnumerable
}
else
{
- // 在判等时是否需要判断子节点也相等?
+ // TODO: 在判等时是否需要判断子节点也相等
return GetNonTerminatorType() == other.GetNonTerminatorType();
}
}
diff --git a/Canon.Core/GrammarParser/Terminator.cs b/Canon.Core/GrammarParser/Terminator.cs
index f0f7569..4b7e996 100644
--- a/Canon.Core/GrammarParser/Terminator.cs
+++ b/Canon.Core/GrammarParser/Terminator.cs
@@ -68,6 +68,11 @@ public class Terminator : TerminatorBase, IEquatable
///
public static Terminator EndTerminator => new(SemanticTokenType.End);
+ ///
+ /// 空字符串的终结符
+ ///
+ public static Terminator EmptyTerminator => new(SemanticTokenType.Empty);
+
public override int GetHashCode()
{
int hash = _terminatorType.GetHashCode();
diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs
index 549799b..6c14ae1 100644
--- a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs
+++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs
@@ -89,7 +89,7 @@ public class SimpleGrammarTests
}
[Fact]
- public void StatsTest()
+ public void StatesTest()
{
GrammarBuilder builder = new()
{
diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs
index bbf58a1..e711f88 100644
--- a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs
+++ b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs
@@ -1,5 +1,6 @@
using Canon.Core.Enums;
using Canon.Core.GrammarParser;
+using Canon.Core.LexicalParser;
using Xunit.Abstractions;
namespace Canon.Tests.GrammarParserTests;
@@ -10,12 +11,11 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
/// 带有空产生式的简单语法(课后题4.18)
/// S -> A
/// A -> BA | ε
- /// B -> aB | b
+ /// B -> aB | a
/// 为了方便测试指定
/// A ProgramStruct
/// B ProgramBody
/// a Identifier
- /// b Identifier
///
// private readonly ITestOutputHelper _testOutputHelper;
@@ -34,7 +34,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
new NonTerminator(NonTerminatorType.ProgramBody),
new NonTerminator(NonTerminatorType.ProgramStruct)
],
- []
+ [Terminator.EmptyTerminator]
]
},
{
@@ -59,21 +59,45 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
builder.Build();
Assert.Contains(builder.FirstSet, pair =>
- pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator));
- Assert.Contains(builder.FirstSet, pair =>
- pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct));
- Assert.Contains(builder.FirstSet, pair =>
- pair.Key == new NonTerminator(NonTerminatorType.ProgramBody));
-
- foreach (HashSet terminators in builder.FirstSet.Values)
{
- Assert.Single(terminators);
- Assert.Contains(Terminator.IdentifierTerminator, terminators);
- }
+ if (pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator))
+ {
+ Assert.Equal(2, pair.Value.Count);
+ Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
+ Assert.Contains(Terminator.EmptyTerminator, pair.Value);
+ return true;
+ }
+
+ return false;
+ });
+
+ Assert.Contains(builder.FirstSet, pair =>
+ {
+ if (pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct))
+ {
+ Assert.Equal(2, pair.Value.Count);
+ Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
+ Assert.Contains(Terminator.EmptyTerminator, pair.Value);
+ return true;
+ }
+
+ return true;
+ });
+ Assert.Contains(builder.FirstSet, pair =>
+ {
+ if (pair.Key == new NonTerminator(NonTerminatorType.ProgramBody))
+ {
+ Assert.Single(pair.Value);
+ Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
+ return true;
+ }
+
+ return false;
+ });
}
[Fact]
- public void StatsTest()
+ public void StatesTest()
{
GrammarBuilder builder = new()
{
@@ -126,4 +150,28 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
_testOutputHelper.WriteLine("--- 5 ---");
_testOutputHelper.WriteLine(state5.ToString());
}
+
+ [Fact]
+ public void AnalyseSingleSentenceTest()
+ {
+ GrammarBuilder builder = new()
+ {
+ Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
+ };
+
+ Grammar grammar = builder.Build();
+
+ List tokens =
+ [
+ new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" },
+ new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" },
+ SemanticToken.End
+ ];
+
+ SyntaxNode root = grammar.Analyse(tokens);
+
+ Assert.False(root.IsTerminated);
+ Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
+ Assert.Equal(7, root.Count());
+ }
}