fix: 正确处理含有空产生式的语法 (#6)

Reviewed-on: PostGuard/Canon#6
This commit is contained in:
jackfiled 2024-03-12 14:52:42 +08:00
parent 7de5ce8f28
commit 35aec34a8e
7 changed files with 119 additions and 21 deletions

View File

@ -11,7 +11,11 @@ public enum SemanticTokenType
/// <summary>
/// 语法分析中的栈底符号
/// </summary>
End
End,
/// <summary>
/// 语法分析中的空串符号
/// </summary>
Empty
}
public enum DelimiterType

View File

@ -48,6 +48,17 @@ public class Grammar
}
break;
}
if (e.Right.Count == 0 && e.LookAhead == enumerator.Current)
{
// 考虑空产生式的归约
// 显然空产生式是不能accept的
reduceFlag = true;
SyntaxNode newNode = new(e.Left.Type);
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
newNode));
}
}
if (acceptFlag)

View File

@ -34,7 +34,7 @@ public class GrammarBuilder
{
foreach (List<TerminatorBase> expression in pair.Value)
{
// 对于空产生式直接跳过处理是正确的吗
// TODO: 对于空产生式直接跳过处理是正确的吗
TerminatorBase? expressionHead = expression.FirstOrDefault();
if (expressionHead is null)
{
@ -117,9 +117,23 @@ public class GrammarBuilder
// 将该非终结符的FirstSet加入进来
NonTerminator nonTerminator = (NonTerminator)expressionHead;
if (FirstSet.TryGetValue(nonTerminator, out HashSet<Terminator>? firstSet))
if (!FirstSet.TryGetValue(nonTerminator, out HashSet<Terminator>? firstSet))
{
result.UnionWith(firstSet);
throw new InvalidOperationException($"Failed to get first set for {nonTerminator}");
}
foreach (Terminator terminator in firstSet)
{
// 如果First中包含空字符串
// 递归获得该字符之后的表达式的FirstSet
if (terminator == Terminator.EmptyTerminator)
{
result.UnionWith(CalculateFirstSetOfExpression(expression[1..]));
}
else
{
result.Add(terminator);
}
}
}
@ -175,9 +189,11 @@ public class GrammarBuilder
{
foreach (Terminator lookAhead in lookAheadSet)
{
// 在新建Expression的时候就不用把空产生式放进右部里面了
Expression newExpression = new()
{
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0
Left = nonTerminator, Right = IsEmptyOnly(nextExpression) ? [] : nextExpression,
LookAhead = lookAhead, Pos = 0
};
if (!closure.Contains(newExpression))
@ -207,6 +223,8 @@ public class GrammarBuilder
Expression begin = new()
{
// 这里就不考虑右部可能为空产生式的情况了
// 毕竟有拓广文法
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
};
@ -284,4 +302,16 @@ public class GrammarBuilder
return new Grammar { Begin = Begin, BeginState = beginState };
}
private static bool IsEmptyOnly(List<TerminatorBase> expression)
{
if (expression.Count != 1 || !expression[0].IsTerminated)
{
return false;
}
Terminator terminator = (Terminator)expression[0];
return terminator == Terminator.EmptyTerminator;
}
}

View File

@ -89,7 +89,7 @@ public class SyntaxNode : IEquatable<SyntaxNode>, IEnumerable<SyntaxNode>
}
else
{
// 在判等时是否需要判断子节点也相等
// TODO: 在判等时是否需要判断子节点也相等
return GetNonTerminatorType() == other.GetNonTerminatorType();
}
}

View File

@ -68,6 +68,11 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
/// </summary>
public static Terminator EndTerminator => new(SemanticTokenType.End);
/// <summary>
/// 空字符串的终结符
/// </summary>
public static Terminator EmptyTerminator => new(SemanticTokenType.Empty);
public override int GetHashCode()
{
int hash = _terminatorType.GetHashCode();

View File

@ -89,7 +89,7 @@ public class SimpleGrammarTests
}
[Fact]
public void StatsTest()
public void StatesTest()
{
GrammarBuilder builder = new()
{

View File

@ -1,5 +1,6 @@
using Canon.Core.Enums;
using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
using Xunit.Abstractions;
namespace Canon.Tests.GrammarParserTests;
@ -10,12 +11,11 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
/// 带有空产生式的简单语法(课后题4.18)
/// S -> A
/// A -> BA | ε
/// B -> aB | b
/// B -> aB | a
/// 为了方便测试指定
/// A ProgramStruct
/// B ProgramBody
/// a Identifier
/// b Identifier
/// </summary>
// private readonly ITestOutputHelper _testOutputHelper;
@ -34,7 +34,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
new NonTerminator(NonTerminatorType.ProgramBody),
new NonTerminator(NonTerminatorType.ProgramStruct)
],
[]
[Terminator.EmptyTerminator]
]
},
{
@ -59,21 +59,45 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
builder.Build();
Assert.Contains(builder.FirstSet, pair =>
pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator));
Assert.Contains(builder.FirstSet, pair =>
pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct));
Assert.Contains(builder.FirstSet, pair =>
pair.Key == new NonTerminator(NonTerminatorType.ProgramBody));
foreach (HashSet<Terminator> terminators in builder.FirstSet.Values)
{
Assert.Single(terminators);
Assert.Contains(Terminator.IdentifierTerminator, terminators);
}
if (pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator))
{
Assert.Equal(2, pair.Value.Count);
Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
Assert.Contains(Terminator.EmptyTerminator, pair.Value);
return true;
}
return false;
});
Assert.Contains(builder.FirstSet, pair =>
{
if (pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct))
{
Assert.Equal(2, pair.Value.Count);
Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
Assert.Contains(Terminator.EmptyTerminator, pair.Value);
return true;
}
return true;
});
Assert.Contains(builder.FirstSet, pair =>
{
if (pair.Key == new NonTerminator(NonTerminatorType.ProgramBody))
{
Assert.Single(pair.Value);
Assert.Contains(Terminator.IdentifierTerminator, pair.Value);
return true;
}
return false;
});
}
[Fact]
public void StatsTest()
public void StatesTest()
{
GrammarBuilder builder = new()
{
@ -126,4 +150,28 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
_testOutputHelper.WriteLine("--- 5 ---");
_testOutputHelper.WriteLine(state5.ToString());
}
[Fact]
public void AnalyseSingleSentenceTest()
{
GrammarBuilder builder = new()
{
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
};
Grammar grammar = builder.Build();
List<SemanticToken> tokens =
[
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" },
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "a" },
SemanticToken.End
];
SyntaxNode root = grammar.Analyse(tokens);
Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Equal(7, root.Count());
}
}