feat: 添加语法分析基类抽象 (#8)

增加语法分析基类和状态转换接口抽象,为直接生成语法分析器做准备,同时也提前释放一些大对象,降低内存消耗。

Reviewed-on: PostGuard/Canon#8
This commit is contained in:
jackfiled 2024-03-13 16:41:44 +08:00
parent e191c1e077
commit bd3db1b7b7
8 changed files with 224 additions and 121 deletions

View File

@ -0,0 +1,70 @@
using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
namespace Canon.Core.Abstractions;
/// <summary>
/// 语法分析器接口
/// </summary>
public abstract class GrammarParserBase
{
public abstract ITransformer BeginTransformer { get; }
public abstract NonTerminator Begin { get; }
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
{
Stack<AnalyseState> stack = [];
stack.Push(new AnalyseState(BeginTransformer, new SyntaxNode(SemanticToken.End)));
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
if (!enumerator.MoveNext())
{
throw new InvalidOperationException("Input token list is empty");
}
while (true)
{
AnalyseState top = stack.Peek();
// 首先尝试进行归约
if (top.State.ReduceTable.TryGetValue(enumerator.Current, out ReduceInformation? information))
{
if (information.Left == Begin)
{
// 如果是归约到起始符
// 那么就直接返回不继续进行归约
return top.Node;
}
SyntaxNode newNode = new(information.Left.Type);
for (int i = 0; i < information.Length; i++)
{
newNode.Children.Add(stack.Pop().Node);
}
stack.Push(new AnalyseState(stack.Peek().State.ShiftTable[information.Left],
newNode));
continue;
}
// 如果没有成功归约就进行移进
if (top.State.ShiftTable.TryGetValue(enumerator.Current, out ITransformer? next))
{
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
if (enumerator.MoveNext())
{
continue;
}
else
{
throw new InvalidOperationException("Run out of token but not accept");
}
}
throw new InvalidOperationException("Failed to analyse input grammar");
}
}
private record AnalyseState(ITransformer State, SyntaxNode Node);
}

View File

@ -0,0 +1,26 @@
using Canon.Core.GrammarParser;
namespace Canon.Core.Abstractions;
/// <summary>
/// 进行归约需要的信息
/// </summary>
/// <param name="Length">归约的长度</param>
/// <param name="Left">归约得到的左部符号</param>
public record ReduceInformation(int Length, NonTerminator Left);
/// <summary>
/// 状态的各种迁移信息
/// </summary>
public interface ITransformer
{
/// <summary>
/// 进行移进的信息
/// </summary>
public IDictionary<TerminatorBase, ITransformer> ShiftTable { get; }
/// <summary>
/// 进行归约的信息
/// </summary>
public IDictionary<Terminator, ReduceInformation> ReduceTable { get; }
}

View File

@ -1,94 +1,90 @@
using Canon.Core.LexicalParser; using Canon.Core.Abstractions;
using Canon.Core.LexicalParser;
namespace Canon.Core.GrammarParser; namespace Canon.Core.GrammarParser;
/// <summary>
/// 通过LR分析方法建立的语法
/// </summary>
public class Grammar public class Grammar
{ {
/// <summary>
/// 起始符
/// </summary>
public required NonTerminator Begin { get; init; } public required NonTerminator Begin { get; init; }
/// <summary>
/// 语法中的DFA
/// </summary>
public required HashSet<LrState> Automation { get; init; }
/// <summary>
/// 起始状态
/// </summary>
public required LrState BeginState { get; init; } public required LrState BeginState { get; init; }
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens) public GrammarParserBase ToGrammarParser()
{ {
Stack<AnalyseState> stack = []; Dictionary<LrState, Transformer> transformers = [];
stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator(); foreach (LrState state in Automation)
if (!enumerator.MoveNext())
{ {
throw new InvalidOperationException("Input token list is empty"); ITransformer transformer;
} if (transformers.TryGetValue(state, out Transformer? oldTransformer))
while (true)
{
AnalyseState top = stack.Peek();
// 尝试进行移进
bool acceptFlag = false, reduceFlag = false;
foreach (Expression e in top.State.Expressions)
{ {
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current) transformer = oldTransformer;
}
else
{
Transformer newTransformer = new();
transformers.Add(state, newTransformer);
transformer = newTransformer;
}
// 生成归约的迁移表
foreach (Expression expression in state.Expressions)
{
if (expression.Pos == expression.Right.Count)
{ {
if (e.Left == Begin) transformer.ReduceTable.TryAdd(expression.LookAhead, new ReduceInformation(
{ expression.Right.Count, expression.Left));
acceptFlag = true;
}
else
{
reduceFlag = true;
SyntaxNode newNode = new(e.Left.Type);
for (int i = 0; i < e.Right.Count; i++)
{
newNode.Children.Add(stack.Pop().Node);
}
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
newNode));
}
break;
}
if (e.Right.Count == 0 && e.LookAhead == enumerator.Current)
{
// 考虑空产生式的归约
// 显然空产生式是不能accept的
reduceFlag = true;
SyntaxNode newNode = new(e.Left.Type);
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
newNode));
} }
} }
if (acceptFlag) // 生成移进的迁移表
foreach (KeyValuePair<TerminatorBase,LrState> pair in state.Transformer)
{ {
// 接受文法 退出循环 ITransformer targetTransformer;
return top.Node; if (transformers.TryGetValue(pair.Value, out Transformer? oldTransformer2))
}
if (reduceFlag)
{
// 归约
continue;
}
// 尝试进行移进
if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
{
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
if (enumerator.MoveNext())
{ {
continue; targetTransformer = oldTransformer2;
} }
else else
{ {
throw new InvalidOperationException("Run out of token but not accept"); Transformer newTransformer = new();
transformers.Add(pair.Value, newTransformer);
targetTransformer = newTransformer;
} }
transformer.ShiftTable.TryAdd(pair.Key, targetTransformer);
} }
throw new InvalidOperationException("Failed to analyse input grammar");
} }
return new GrammarParser(transformers[BeginState], Begin);
}
private class GrammarParser(ITransformer beginTransformer, NonTerminator begin) : GrammarParserBase
{
public override ITransformer BeginTransformer { get; } = beginTransformer;
public override NonTerminator Begin { get; } = begin;
}
private class Transformer : ITransformer
{
public IDictionary<TerminatorBase, ITransformer> ShiftTable { get; }
= new Dictionary<TerminatorBase, ITransformer>();
public IDictionary<Terminator, ReduceInformation> ReduceTable { get; }
= new Dictionary<Terminator, ReduceInformation>();
} }
private record AnalyseState(LrState State, SyntaxNode Node); private record AnalyseState(LrState State, SyntaxNode Node);

View File

@ -300,7 +300,7 @@ public class GrammarBuilder
Automation.UnionWith(addedStates); Automation.UnionWith(addedStates);
} }
return new Grammar { Begin = Begin, BeginState = beginState }; return new Grammar { Begin = Begin, BeginState = beginState, Automation = Automation};
} }
private static bool IsEmptyOnly(List<TerminatorBase> expression) private static bool IsEmptyOnly(List<TerminatorBase> expression)

View File

@ -1,4 +1,5 @@
using Canon.Core.Enums; using Canon.Core.Abstractions;
using Canon.Core.Enums;
using Canon.Core.GrammarParser; using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
@ -118,6 +119,7 @@ public class SimpleGrammarTests
}; };
Grammar grammar = builder.Build(); Grammar grammar = builder.Build();
GrammarParserBase parser = grammar.ToGrammarParser();
// n + n // n + n
List<SemanticToken> tokens = List<SemanticToken> tokens =
[ [
@ -142,7 +144,7 @@ public class SimpleGrammarTests
// F n // F n
// | // |
// n // n
SyntaxNode root = grammar.Analyse(tokens); SyntaxNode root = parser.Analyse(tokens);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Equal(3, root.Children.Count); Assert.Equal(3, root.Children.Count);
Assert.Contains(root.Children, node => Assert.Contains(root.Children, node =>
@ -168,6 +170,8 @@ public class SimpleGrammarTests
}; };
Grammar grammar = builder.Build(); Grammar grammar = builder.Build();
GrammarParserBase parser = grammar.ToGrammarParser();
// (n + n) * n // (n + n) * n
List<SemanticToken> tokens = List<SemanticToken> tokens =
[ [
@ -193,7 +197,8 @@ public class SimpleGrammarTests
SemanticToken.End SemanticToken.End
]; ];
SyntaxNode root = grammar.Analyse(tokens);
SyntaxNode root = parser.Analyse(tokens);
Assert.Equal(18, root.Count()); Assert.Equal(18, root.Count());
Assert.False(root.IsTerminated); Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());

View File

@ -1,4 +1,5 @@
using Canon.Core.Enums; using Canon.Core.Abstractions;
using Canon.Core.Enums;
using Canon.Core.GrammarParser; using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser; using Canon.Core.LexicalParser;
using Xunit.Abstractions; using Xunit.Abstractions;
@ -151,6 +152,23 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
_testOutputHelper.WriteLine(state5.ToString()); _testOutputHelper.WriteLine(state5.ToString());
} }
[Fact]
public void ParserTest()
{
GrammarBuilder builder = new()
{
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
};
Grammar grammar = builder.Build();
GrammarParserBase parser = grammar.ToGrammarParser();
ITransformer transformer1 = parser.BeginTransformer;
Assert.Equal(3, transformer1.ShiftTable.Count);
Assert.Single(transformer1.ReduceTable);
Assert.Contains(new NonTerminator(NonTerminatorType.ProgramStruct),transformer1.ShiftTable);
}
[Fact] [Fact]
public void AnalyseSingleSentenceTest() public void AnalyseSingleSentenceTest()
{ {
@ -160,6 +178,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
}; };
Grammar grammar = builder.Build(); Grammar grammar = builder.Build();
GrammarParserBase parser = grammar.ToGrammarParser();
List<SemanticToken> tokens = List<SemanticToken> tokens =
[ [
@ -168,7 +187,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
SemanticToken.End SemanticToken.End
]; ];
SyntaxNode root = grammar.Analyse(tokens); SyntaxNode root = parser.Analyse(tokens);
Assert.False(root.IsTerminated); Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());

View File

@ -27,11 +27,10 @@ public class TerminatorTests
public void TerminatorAndKeywordSemanticTokenTest() public void TerminatorAndKeywordSemanticTokenTest()
{ {
Terminator keywordTerminator = new(KeywordType.Array); Terminator keywordTerminator = new(KeywordType.Array);
LinkedList<char> keywordContent = Utils.GetLinkedList("array [3..9] of integer"); KeywordSemanticToken keywordSemanticToken = new()
{
Assert.True(KeywordSemanticToken.TryParse(0, 0, keywordContent.First!, LinePos = 0, CharacterPos = 0, KeywordType = KeywordType.Array, LiteralValue = "array"
out KeywordSemanticToken? keywordSemanticToken)); };
Assert.NotNull(keywordSemanticToken);
Assert.True(keywordTerminator == keywordSemanticToken); Assert.True(keywordTerminator == keywordSemanticToken);
} }
@ -39,11 +38,10 @@ public class TerminatorTests
public void TerminatorAndDelimiterSemanticTokenTest() public void TerminatorAndDelimiterSemanticTokenTest()
{ {
Terminator terminator = new(DelimiterType.Period); Terminator terminator = new(DelimiterType.Period);
LinkedList<char> content = Utils.GetLinkedList("."); DelimiterSemanticToken token = new()
{
Assert.True(DelimiterSemanticToken.TryParse(0, 0, content.First!, LinePos = 0, CharacterPos = 0, DelimiterType = DelimiterType.Period, LiteralValue = "."
out DelimiterSemanticToken? token)); };
Assert.NotNull(token);
Assert.True(token == terminator); Assert.True(token == terminator);
} }
@ -51,44 +49,10 @@ public class TerminatorTests
public void TerminatorAndOperatorSemanticTokenTest() public void TerminatorAndOperatorSemanticTokenTest()
{ {
Terminator terminator = new(OperatorType.GreaterEqual); Terminator terminator = new(OperatorType.GreaterEqual);
LinkedList<char> content = Utils.GetLinkedList(">="); OperatorSemanticToken token = new()
{
Assert.True(OperatorSemanticToken.TryParse(0, 0, content.First!, LinePos = 0, CharacterPos = 0, OperatorType = OperatorType.GreaterEqual, LiteralValue = ">="
out OperatorSemanticToken? token)); };
Assert.NotNull(token);
Assert.True(token == terminator); Assert.True(token == terminator);
} }
[Fact]
public void TerminatorAndNumberSemanticTokenTest()
{
LinkedList<char> content = Utils.GetLinkedList("123");
Assert.True(NumberSemanticToken.TryParse(0, 0, content.First!,
out NumberSemanticToken? token));
Assert.NotNull(token);
Assert.True(Terminator.NumberTerminator == token);
}
[Fact]
public void TerminatorAndCharacterSemanticTokenTest()
{
LinkedList<char> content = Utils.GetLinkedList("'a'");
Assert.True(CharacterSemanticToken.TryParse(0, 0, content.First!,
out CharacterSemanticToken? token));
Assert.NotNull(token);
Assert.True(Terminator.CharacterTerminator == token);
}
[Fact]
public void TerminatorAndIdentifierSemanticTokenTest()
{
LinkedList<char> content = Utils.GetLinkedList("gcd");
Assert.True(IdentifierSemanticToken.TryParse(0, 0, content.First!,
out IdentifierSemanticToken? token));
Assert.NotNull(token);
Assert.True(Terminator.IdentifierTerminator == token);
}
} }

View File

@ -1,4 +1,6 @@
namespace Canon.Tests; using Canon.Core.GrammarParser;
namespace Canon.Tests;
public static class Utils public static class Utils
{ {
@ -13,4 +15,25 @@ public static class Utils
return list; return list;
} }
/// <summary>
/// 验证两棵语法树一致
/// </summary>
/// <param name="a">一棵语法树</param>
/// <param name="b">另一棵语法树</param>
public static void CheckSyntaxRoot(SyntaxNode a, SyntaxNode b)
{
int length = a.Count();
Assert.Equal(length, b.Count());
using IEnumerator<SyntaxNode> aIter = a.GetEnumerator(), bIter = b.GetEnumerator();
for (int i = 0; i < length; i++)
{
Assert.True(aIter.MoveNext());
Assert.True(bIter.MoveNext());
Assert.Equal(aIter.Current, bIter.Current);
}
}
} }