fix: 正确处理含有空产生式的语法 (#6)

Reviewed-on: PostGuard/Canon#6
This commit is contained in:
jackfiled 2024-03-12 14:52:42 +08:00
parent 7de5ce8f28
commit 35aec34a8e
7 changed files with 119 additions and 21 deletions

View File

@ -11,7 +11,11 @@ public enum SemanticTokenType
/// <summary> /// <summary>
/// 语法分析中的栈底符号 /// 语法分析中的栈底符号
/// </summary> /// </summary>
End End,
/// <summary>
/// 语法分析中的空串符号
/// </summary>
Empty
} }
public enum DelimiterType public enum DelimiterType

View File

@ -48,6 +48,17 @@ public class Grammar
} }
break; break;
} }
if (e.Right.Count == 0 && e.LookAhead == enumerator.Current)
{
// 考虑空产生式的归约
// 显然空产生式是不能accept的
reduceFlag = true;
SyntaxNode newNode = new(e.Left.Type);
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
newNode));
}
} }
if (acceptFlag) if (acceptFlag)

View File

@ -34,7 +34,7 @@ public class GrammarBuilder
{ {
foreach (List<TerminatorBase> expression in pair.Value) foreach (List<TerminatorBase> expression in pair.Value)
{ {
// 对于空产生式直接跳过处理是正确的吗 // TODO: 对于空产生式直接跳过处理是正确的吗
TerminatorBase? expressionHead = expression.FirstOrDefault(); TerminatorBase? expressionHead = expression.FirstOrDefault();
if (expressionHead is null) if (expressionHead is null)
{ {
@ -117,9 +117,23 @@ public class GrammarBuilder
// 将该非终结符的FirstSet加入进来 // 将该非终结符的FirstSet加入进来
NonTerminator nonTerminator = (NonTerminator)expressionHead; NonTerminator nonTerminator = (NonTerminator)expressionHead;
if (FirstSet.TryGetValue(nonTerminator, out HashSet<Terminator>? firstSet)) if (!FirstSet.TryGetValue(nonTerminator, out HashSet<Terminator>? firstSet))
{ {
result.UnionWith(firstSet); throw new InvalidOperationException($"Failed to get first set for {nonTerminator}");
}
foreach (Terminator terminator in firstSet)
{
// 如果First中包含空字符串
// 递归获得该字符之后的表达式的FirstSet
if (terminator == Terminator.EmptyTerminator)
{
result.UnionWith(CalculateFirstSetOfExpression(expression[1..]));
}
else
{
result.Add(terminator);
}
} }
} }
@ -175,9 +189,11 @@ public class GrammarBuilder
{ {
foreach (Terminator lookAhead in lookAheadSet) foreach (Terminator lookAhead in lookAheadSet)
{ {
// 在新建Expression的时候就不用把空产生式放进右部里面了
Expression newExpression = new() Expression newExpression = new()
{ {
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0 Left = nonTerminator, Right = IsEmptyOnly(nextExpression) ? [] : nextExpression,
LookAhead = lookAhead, Pos = 0
}; };
if (!closure.Contains(newExpression)) if (!closure.Contains(newExpression))
@ -207,6 +223,8 @@ public class GrammarBuilder
Expression begin = new() Expression begin = new()
{ {
// 这里就不考虑右部可能为空产生式的情况了
// 毕竟有拓广文法
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0 Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
}; };
@ -284,4 +302,16 @@ public class GrammarBuilder
return new Grammar { Begin = Begin, BeginState = beginState }; return new Grammar { Begin = Begin, BeginState = beginState };
} }
private static bool IsEmptyOnly(List<TerminatorBase> expression)
{
if (expression.Count != 1 || !expression[0].IsTerminated)
{
return false;
}
Terminator terminator = (Terminator)expression[0];
return terminator == Terminator.EmptyTerminator;
}
} }

View File

@ -89,7 +89,7 @@ public class SyntaxNode : IEquatable<SyntaxNode>, IEnumerable<SyntaxNode>
} }
else else
{ {
// 在判等时是否需要判断子节点也相等 // TODO: 在判等时是否需要判断子节点也相等
return GetNonTerminatorType() == other.GetNonTerminatorType(); return GetNonTerminatorType() == other.GetNonTerminatorType();
} }
} }

View File

@ -68,6 +68,11 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
/// </summary> /// </summary>
public static Terminator EndTerminator => new(SemanticTokenType.End); public static Terminator EndTerminator => new(SemanticTokenType.End);
/// <summary>
/// 空字符串的终结符
/// </summary>
public static Terminator EmptyTerminator => new(SemanticTokenType.Empty);
public override int GetHashCode() public override int GetHashCode()
{ {
int hash = _terminatorType.GetHashCode(); int hash = _terminatorType.GetHashCode();

View File

@ -89,7 +89,7 @@ public class SimpleGrammarTests
} }
[Fact] [Fact]
public void StatsTest() public void StatesTest()
{ {
GrammarBuilder builder = new() GrammarBuilder builder = new()
{ {

View File

@ -1,5 +1,6 @@
using Canon.Core.Enums; using Canon.Core.Enums;
using Canon.Core.GrammarParser; using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
using Xunit.Abstractions; using Xunit.Abstractions;
namespace Canon.Tests.GrammarParserTests; namespace Canon.Tests.GrammarParserTests;
@ -10,12 +11,11 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
/// 带有空产生式的简单语法(课后题4.18) /// 带有空产生式的简单语法(课后题4.18)
/// S -> A /// S -> A
/// A -> BA | ε /// A -> BA | ε
/// B -> aB | b /// B -> aB | a
/// 为了方便测试指定 /// 为了方便测试指定
/// A ProgramStruct /// A ProgramStruct
/// B ProgramBody /// B ProgramBody
/// a Identifier /// a Identifier
/// b Identifier
/// </summary> /// </summary>
// private readonly ITestOutputHelper _testOutputHelper; // private readonly ITestOutputHelper _testOutputHelper;
@ -34,7 +34,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
new NonTerminator(NonTerminatorType.ProgramBody), new NonTerminator(NonTerminatorType.ProgramBody),
new NonTerminator(NonTerminatorType.ProgramStruct) new NonTerminator(NonTerminatorType.ProgramStruct)
], ],
[] [Terminator.EmptyTerminator]
] ]
}, },
{ {
@ -59,21 +59,45 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
builder.Build(); builder.Build();
Assert.Contains(builder.FirstSet, pair => Assert.Contains(builder.FirstSet, pair =>
pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator));
Assert.Contains(builder.FirstSet, pair =>
pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct));
Assert.Contains(builder.FirstSet, pair =>
pair.Key == new NonTerminator(NonTerminatorType.ProgramBody));
foreach (HashSet<Terminator> terminators in builder.FirstSet.Values)
{ {
Assert.Single(terminators); if (pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator))
Assert.Contains(Terminator.IdentifierTerminator, terminators); {
} Assert.Equal(2, pair.Value.Count);
Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
Assert.Contains(Terminator.EmptyTerminator, pair.Value);
return true;
}
return false;
});
Assert.Contains(builder.FirstSet, pair =>
{
if (pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct))
{
Assert.Equal(2, pair.Value.Count);
Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
Assert.Contains(Terminator.EmptyTerminator, pair.Value);
return true;
}
return true;
});
Assert.Contains(builder.FirstSet, pair =>
{
if (pair.Key == new NonTerminator(NonTerminatorType.ProgramBody))
{
Assert.Single(pair.Value);
Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
return true;
}
return false;
});
} }
[Fact] [Fact]
public void StatsTest() public void StatesTest()
{ {
GrammarBuilder builder = new() GrammarBuilder builder = new()
{ {
@ -126,4 +150,28 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
_testOutputHelper.WriteLine("--- 5 ---"); _testOutputHelper.WriteLine("--- 5 ---");
_testOutputHelper.WriteLine(state5.ToString()); _testOutputHelper.WriteLine(state5.ToString());
} }
[Fact]
public void AnalyseSingleSentenceTest()
{
GrammarBuilder builder = new()
{
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
};
Grammar grammar = builder.Build();
List<SemanticToken> tokens =
[
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" },
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" },
SemanticToken.End
];
SyntaxNode root = grammar.Analyse(tokens);
Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Equal(7, root.Count());
}
} }