feat: 添加语法分析基类抽象 (#8)
增加语法分析基类和状态转换接口抽象,为直接生成语法分析器做准备,同时也提前释放一些大对象,降低内存消耗。 Reviewed-on: PostGuard/Canon#8
This commit is contained in:
parent
e191c1e077
commit
bd3db1b7b7
70
Canon.Core/Abstractions/GrammarParseBase.cs
Normal file
70
Canon.Core/Abstractions/GrammarParseBase.cs
Normal file
|
@ -0,0 +1,70 @@
|
|||
using Canon.Core.GrammarParser;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
namespace Canon.Core.Abstractions;
|
||||
|
||||
/// <summary>
|
||||
/// 语法分析器接口
|
||||
/// </summary>
|
||||
public abstract class GrammarParserBase
|
||||
{
|
||||
public abstract ITransformer BeginTransformer { get; }
|
||||
|
||||
public abstract NonTerminator Begin { get; }
|
||||
|
||||
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
|
||||
{
|
||||
Stack<AnalyseState> stack = [];
|
||||
stack.Push(new AnalyseState(BeginTransformer, new SyntaxNode(SemanticToken.End)));
|
||||
|
||||
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
|
||||
if (!enumerator.MoveNext())
|
||||
{
|
||||
throw new InvalidOperationException("Input token list is empty");
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
AnalyseState top = stack.Peek();
|
||||
|
||||
// 首先尝试进行归约
|
||||
if (top.State.ReduceTable.TryGetValue(enumerator.Current, out ReduceInformation? information))
|
||||
{
|
||||
if (information.Left == Begin)
|
||||
{
|
||||
// 如果是归约到起始符
|
||||
// 那么就直接返回不继续进行归约
|
||||
return top.Node;
|
||||
}
|
||||
|
||||
SyntaxNode newNode = new(information.Left.Type);
|
||||
for (int i = 0; i < information.Length; i++)
|
||||
{
|
||||
newNode.Children.Add(stack.Pop().Node);
|
||||
}
|
||||
|
||||
stack.Push(new AnalyseState(stack.Peek().State.ShiftTable[information.Left],
|
||||
newNode));
|
||||
continue;
|
||||
}
|
||||
|
||||
// 如果没有成功归约就进行移进
|
||||
if (top.State.ShiftTable.TryGetValue(enumerator.Current, out ITransformer? next))
|
||||
{
|
||||
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
|
||||
if (enumerator.MoveNext())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("Run out of token but not accept");
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Failed to analyse input grammar");
|
||||
}
|
||||
}
|
||||
|
||||
private record AnalyseState(ITransformer State, SyntaxNode Node);
|
||||
}
|
26
Canon.Core/Abstractions/ITransformer.cs
Normal file
26
Canon.Core/Abstractions/ITransformer.cs
Normal file
|
@ -0,0 +1,26 @@
|
|||
using Canon.Core.GrammarParser;
|
||||
|
||||
namespace Canon.Core.Abstractions;
|
||||
|
||||
/// <summary>
|
||||
/// 进行归约需要的信息
|
||||
/// </summary>
|
||||
/// <param name="Length">归约的长度</param>
|
||||
/// <param name="Left">归约得到的左部符号</param>
|
||||
public record ReduceInformation(int Length, NonTerminator Left);
|
||||
|
||||
/// <summary>
|
||||
/// 状态的各种迁移信息
|
||||
/// </summary>
|
||||
public interface ITransformer
|
||||
{
|
||||
/// <summary>
|
||||
/// 进行移进的信息
|
||||
/// </summary>
|
||||
public IDictionary<TerminatorBase, ITransformer> ShiftTable { get; }
|
||||
|
||||
/// <summary>
|
||||
/// 进行归约的信息
|
||||
/// </summary>
|
||||
public IDictionary<Terminator, ReduceInformation> ReduceTable { get; }
|
||||
}
|
|
@ -1,94 +1,90 @@
|
|||
using Canon.Core.LexicalParser;
|
||||
using Canon.Core.Abstractions;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
namespace Canon.Core.GrammarParser;
|
||||
|
||||
/// <summary>
|
||||
/// 通过LR分析方法建立的语法
|
||||
/// </summary>
|
||||
public class Grammar
|
||||
{
|
||||
/// <summary>
|
||||
/// 起始符
|
||||
/// </summary>
|
||||
public required NonTerminator Begin { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// 语法中的DFA
|
||||
/// </summary>
|
||||
public required HashSet<LrState> Automation { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// 起始状态
|
||||
/// </summary>
|
||||
public required LrState BeginState { get; init; }
|
||||
|
||||
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
|
||||
public GrammarParserBase ToGrammarParser()
|
||||
{
|
||||
Stack<AnalyseState> stack = [];
|
||||
stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
|
||||
Dictionary<LrState, Transformer> transformers = [];
|
||||
|
||||
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
|
||||
if (!enumerator.MoveNext())
|
||||
foreach (LrState state in Automation)
|
||||
{
|
||||
throw new InvalidOperationException("Input token list is empty");
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
AnalyseState top = stack.Peek();
|
||||
|
||||
// 尝试进行移进
|
||||
bool acceptFlag = false, reduceFlag = false;
|
||||
foreach (Expression e in top.State.Expressions)
|
||||
ITransformer transformer;
|
||||
if (transformers.TryGetValue(state, out Transformer? oldTransformer))
|
||||
{
|
||||
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
|
||||
transformer = oldTransformer;
|
||||
}
|
||||
else
|
||||
{
|
||||
Transformer newTransformer = new();
|
||||
transformers.Add(state, newTransformer);
|
||||
transformer = newTransformer;
|
||||
}
|
||||
|
||||
// 生成归约的迁移表
|
||||
foreach (Expression expression in state.Expressions)
|
||||
{
|
||||
if (expression.Pos == expression.Right.Count)
|
||||
{
|
||||
if (e.Left == Begin)
|
||||
{
|
||||
acceptFlag = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
reduceFlag = true;
|
||||
SyntaxNode newNode = new(e.Left.Type);
|
||||
|
||||
for (int i = 0; i < e.Right.Count; i++)
|
||||
{
|
||||
newNode.Children.Add(stack.Pop().Node);
|
||||
}
|
||||
|
||||
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
|
||||
newNode));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (e.Right.Count == 0 && e.LookAhead == enumerator.Current)
|
||||
{
|
||||
// 考虑空产生式的归约
|
||||
// 显然空产生式是不能accept的
|
||||
reduceFlag = true;
|
||||
SyntaxNode newNode = new(e.Left.Type);
|
||||
|
||||
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
|
||||
newNode));
|
||||
transformer.ReduceTable.TryAdd(expression.LookAhead, new ReduceInformation(
|
||||
expression.Right.Count, expression.Left));
|
||||
}
|
||||
}
|
||||
|
||||
if (acceptFlag)
|
||||
// 生成移进的迁移表
|
||||
foreach (KeyValuePair<TerminatorBase,LrState> pair in state.Transformer)
|
||||
{
|
||||
// 接受文法 退出循环
|
||||
return top.Node;
|
||||
}
|
||||
|
||||
if (reduceFlag)
|
||||
{
|
||||
// 归约
|
||||
continue;
|
||||
}
|
||||
|
||||
// 尝试进行移进
|
||||
if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
|
||||
{
|
||||
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
|
||||
if (enumerator.MoveNext())
|
||||
ITransformer targetTransformer;
|
||||
if (transformers.TryGetValue(pair.Value, out Transformer? oldTransformer2))
|
||||
{
|
||||
continue;
|
||||
targetTransformer = oldTransformer2;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("Run out of token but not accept");
|
||||
Transformer newTransformer = new();
|
||||
transformers.Add(pair.Value, newTransformer);
|
||||
targetTransformer = newTransformer;
|
||||
}
|
||||
transformer.ShiftTable.TryAdd(pair.Key, targetTransformer);
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Failed to analyse input grammar");
|
||||
}
|
||||
|
||||
return new GrammarParser(transformers[BeginState], Begin);
|
||||
}
|
||||
|
||||
private class GrammarParser(ITransformer beginTransformer, NonTerminator begin) : GrammarParserBase
|
||||
{
|
||||
public override ITransformer BeginTransformer { get; } = beginTransformer;
|
||||
public override NonTerminator Begin { get; } = begin;
|
||||
}
|
||||
|
||||
private class Transformer : ITransformer
|
||||
{
|
||||
public IDictionary<TerminatorBase, ITransformer> ShiftTable { get; }
|
||||
= new Dictionary<TerminatorBase, ITransformer>();
|
||||
|
||||
public IDictionary<Terminator, ReduceInformation> ReduceTable { get; }
|
||||
= new Dictionary<Terminator, ReduceInformation>();
|
||||
}
|
||||
|
||||
private record AnalyseState(LrState State, SyntaxNode Node);
|
||||
|
|
|
@ -300,7 +300,7 @@ public class GrammarBuilder
|
|||
Automation.UnionWith(addedStates);
|
||||
}
|
||||
|
||||
return new Grammar { Begin = Begin, BeginState = beginState };
|
||||
return new Grammar { Begin = Begin, BeginState = beginState, Automation = Automation};
|
||||
}
|
||||
|
||||
private static bool IsEmptyOnly(List<TerminatorBase> expression)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.Abstractions;
|
||||
using Canon.Core.Enums;
|
||||
using Canon.Core.GrammarParser;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
|
@ -118,6 +119,7 @@ public class SimpleGrammarTests
|
|||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
GrammarParserBase parser = grammar.ToGrammarParser();
|
||||
// n + n
|
||||
List<SemanticToken> tokens =
|
||||
[
|
||||
|
@ -142,7 +144,7 @@ public class SimpleGrammarTests
|
|||
// F n
|
||||
// |
|
||||
// n
|
||||
SyntaxNode root = grammar.Analyse(tokens);
|
||||
SyntaxNode root = parser.Analyse(tokens);
|
||||
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||
Assert.Equal(3, root.Children.Count);
|
||||
Assert.Contains(root.Children, node =>
|
||||
|
@ -168,6 +170,8 @@ public class SimpleGrammarTests
|
|||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
GrammarParserBase parser = grammar.ToGrammarParser();
|
||||
|
||||
// (n + n) * n
|
||||
List<SemanticToken> tokens =
|
||||
[
|
||||
|
@ -193,7 +197,8 @@ public class SimpleGrammarTests
|
|||
SemanticToken.End
|
||||
];
|
||||
|
||||
SyntaxNode root = grammar.Analyse(tokens);
|
||||
|
||||
SyntaxNode root = parser.Analyse(tokens);
|
||||
Assert.Equal(18, root.Count());
|
||||
Assert.False(root.IsTerminated);
|
||||
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.Abstractions;
|
||||
using Canon.Core.Enums;
|
||||
using Canon.Core.GrammarParser;
|
||||
using Canon.Core.LexicalParser;
|
||||
using Xunit.Abstractions;
|
||||
|
@ -151,6 +152,23 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
|
|||
_testOutputHelper.WriteLine(state5.ToString());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParserTest()
|
||||
{
|
||||
GrammarBuilder builder = new()
|
||||
{
|
||||
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
|
||||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
GrammarParserBase parser = grammar.ToGrammarParser();
|
||||
|
||||
ITransformer transformer1 = parser.BeginTransformer;
|
||||
Assert.Equal(3, transformer1.ShiftTable.Count);
|
||||
Assert.Single(transformer1.ReduceTable);
|
||||
Assert.Contains(new NonTerminator(NonTerminatorType.ProgramStruct),transformer1.ShiftTable);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AnalyseSingleSentenceTest()
|
||||
{
|
||||
|
@ -160,6 +178,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
|
|||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
GrammarParserBase parser = grammar.ToGrammarParser();
|
||||
|
||||
List<SemanticToken> tokens =
|
||||
[
|
||||
|
@ -168,7 +187,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
|
|||
SemanticToken.End
|
||||
];
|
||||
|
||||
SyntaxNode root = grammar.Analyse(tokens);
|
||||
SyntaxNode root = parser.Analyse(tokens);
|
||||
|
||||
Assert.False(root.IsTerminated);
|
||||
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||
|
|
|
@ -27,11 +27,10 @@ public class TerminatorTests
|
|||
public void TerminatorAndKeywordSemanticTokenTest()
|
||||
{
|
||||
Terminator keywordTerminator = new(KeywordType.Array);
|
||||
LinkedList<char> keywordContent = Utils.GetLinkedList("array [3..9] of integer");
|
||||
|
||||
Assert.True(KeywordSemanticToken.TryParse(0, 0, keywordContent.First!,
|
||||
out KeywordSemanticToken? keywordSemanticToken));
|
||||
Assert.NotNull(keywordSemanticToken);
|
||||
KeywordSemanticToken keywordSemanticToken = new()
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, KeywordType = KeywordType.Array, LiteralValue = "array"
|
||||
};
|
||||
Assert.True(keywordTerminator == keywordSemanticToken);
|
||||
}
|
||||
|
||||
|
@ -39,11 +38,10 @@ public class TerminatorTests
|
|||
public void TerminatorAndDelimiterSemanticTokenTest()
|
||||
{
|
||||
Terminator terminator = new(DelimiterType.Period);
|
||||
LinkedList<char> content = Utils.GetLinkedList(".");
|
||||
|
||||
Assert.True(DelimiterSemanticToken.TryParse(0, 0, content.First!,
|
||||
out DelimiterSemanticToken? token));
|
||||
Assert.NotNull(token);
|
||||
DelimiterSemanticToken token = new()
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, DelimiterType = DelimiterType.Period, LiteralValue = "."
|
||||
};
|
||||
Assert.True(token == terminator);
|
||||
}
|
||||
|
||||
|
@ -51,44 +49,10 @@ public class TerminatorTests
|
|||
public void TerminatorAndOperatorSemanticTokenTest()
|
||||
{
|
||||
Terminator terminator = new(OperatorType.GreaterEqual);
|
||||
LinkedList<char> content = Utils.GetLinkedList(">=");
|
||||
|
||||
Assert.True(OperatorSemanticToken.TryParse(0, 0, content.First!,
|
||||
out OperatorSemanticToken? token));
|
||||
Assert.NotNull(token);
|
||||
OperatorSemanticToken token = new()
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, OperatorType = OperatorType.GreaterEqual, LiteralValue = ">="
|
||||
};
|
||||
Assert.True(token == terminator);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminatorAndNumberSemanticTokenTest()
|
||||
{
|
||||
LinkedList<char> content = Utils.GetLinkedList("123");
|
||||
|
||||
Assert.True(NumberSemanticToken.TryParse(0, 0, content.First!,
|
||||
out NumberSemanticToken? token));
|
||||
Assert.NotNull(token);
|
||||
Assert.True(Terminator.NumberTerminator == token);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminatorAndCharacterSemanticTokenTest()
|
||||
{
|
||||
LinkedList<char> content = Utils.GetLinkedList("'a'");
|
||||
|
||||
Assert.True(CharacterSemanticToken.TryParse(0, 0, content.First!,
|
||||
out CharacterSemanticToken? token));
|
||||
Assert.NotNull(token);
|
||||
Assert.True(Terminator.CharacterTerminator == token);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TerminatorAndIdentifierSemanticTokenTest()
|
||||
{
|
||||
LinkedList<char> content = Utils.GetLinkedList("gcd");
|
||||
|
||||
Assert.True(IdentifierSemanticToken.TryParse(0, 0, content.First!,
|
||||
out IdentifierSemanticToken? token));
|
||||
Assert.NotNull(token);
|
||||
Assert.True(Terminator.IdentifierTerminator == token);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
namespace Canon.Tests;
|
||||
using Canon.Core.GrammarParser;
|
||||
|
||||
namespace Canon.Tests;
|
||||
|
||||
public static class Utils
|
||||
{
|
||||
|
@ -13,4 +15,25 @@ public static class Utils
|
|||
|
||||
return list;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 验证两棵语法树一致
|
||||
/// </summary>
|
||||
/// <param name="a">一棵语法树</param>
|
||||
/// <param name="b">另一棵语法树</param>
|
||||
public static void CheckSyntaxRoot(SyntaxNode a, SyntaxNode b)
|
||||
{
|
||||
int length = a.Count();
|
||||
Assert.Equal(length, b.Count());
|
||||
|
||||
using IEnumerator<SyntaxNode> aIter = a.GetEnumerator(), bIter = b.GetEnumerator();
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
Assert.True(aIter.MoveNext());
|
||||
Assert.True(bIter.MoveNext());
|
||||
|
||||
Assert.Equal(aIter.Current, bIter.Current);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user