diff --git a/Canon.Core/Abstractions/GrammarParseBase.cs b/Canon.Core/Abstractions/GrammarParseBase.cs
new file mode 100644
index 0000000..aca801c
--- /dev/null
+++ b/Canon.Core/Abstractions/GrammarParseBase.cs
@@ -0,0 +1,70 @@
+using Canon.Core.GrammarParser;
+using Canon.Core.LexicalParser;
+
+namespace Canon.Core.Abstractions;
+
+///
+/// 语法分析器接口
+///
+public abstract class GrammarParserBase
+{
+ public abstract ITransformer BeginTransformer { get; }
+
+ public abstract NonTerminator Begin { get; }
+
+ public SyntaxNode Analyse(IEnumerable tokens)
+ {
+ Stack stack = [];
+ stack.Push(new AnalyseState(BeginTransformer, new SyntaxNode(SemanticToken.End)));
+
+ using IEnumerator enumerator = tokens.GetEnumerator();
+ if (!enumerator.MoveNext())
+ {
+ throw new InvalidOperationException("Input token list is empty");
+ }
+
+ while (true)
+ {
+ AnalyseState top = stack.Peek();
+
+ // 首先尝试进行归约
+ if (top.State.ReduceTable.TryGetValue(enumerator.Current, out ReduceInformation? information))
+ {
+ if (information.Left == Begin)
+ {
+ // 如果是归约到起始符
+ // 那么就直接返回不继续进行归约
+ return top.Node;
+ }
+
+ SyntaxNode newNode = new(information.Left.Type);
+ for (int i = 0; i < information.Length; i++)
+ {
+ newNode.Children.Add(stack.Pop().Node);
+ }
+
+ stack.Push(new AnalyseState(stack.Peek().State.ShiftTable[information.Left],
+ newNode));
+ continue;
+ }
+
+ // 如果没有成功归约就进行移进
+ if (top.State.ShiftTable.TryGetValue(enumerator.Current, out ITransformer? next))
+ {
+ stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
+ if (enumerator.MoveNext())
+ {
+ continue;
+ }
+ else
+ {
+ throw new InvalidOperationException("Run out of token but not accept");
+ }
+ }
+
+ throw new InvalidOperationException("Failed to analyse input grammar");
+ }
+ }
+
+ private record AnalyseState(ITransformer State, SyntaxNode Node);
+}
diff --git a/Canon.Core/Abstractions/ITransformer.cs b/Canon.Core/Abstractions/ITransformer.cs
new file mode 100644
index 0000000..aad519c
--- /dev/null
+++ b/Canon.Core/Abstractions/ITransformer.cs
@@ -0,0 +1,26 @@
+using Canon.Core.GrammarParser;
+
+namespace Canon.Core.Abstractions;
+
+///
+/// 进行归约需要的信息
+///
+/// 归约的长度
+/// 归约得到的左部符号
+public record ReduceInformation(int Length, NonTerminator Left);
+
+///
+/// 状态的各种迁移信息
+///
+public interface ITransformer
+{
+ ///
+ /// 进行移进的信息
+ ///
+ public IDictionary ShiftTable { get; }
+
+ ///
+ /// 进行归约的信息
+ ///
+ public IDictionary ReduceTable { get; }
+}
diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs
index 7404db8..b432889 100644
--- a/Canon.Core/GrammarParser/Grammar.cs
+++ b/Canon.Core/GrammarParser/Grammar.cs
@@ -1,94 +1,90 @@
-using Canon.Core.LexicalParser;
+using Canon.Core.Abstractions;
+using Canon.Core.LexicalParser;
namespace Canon.Core.GrammarParser;
+///
+/// 通过LR分析方法建立的语法
+///
public class Grammar
{
+ ///
+ /// 起始符
+ ///
public required NonTerminator Begin { get; init; }
+ ///
+ /// 语法中的DFA
+ ///
+ public required HashSet Automation { get; init; }
+
+ ///
+ /// 起始状态
+ ///
public required LrState BeginState { get; init; }
- public SyntaxNode Analyse(IEnumerable tokens)
+ public GrammarParserBase ToGrammarParser()
{
- Stack stack = [];
- stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
+ Dictionary transformers = [];
- using IEnumerator enumerator = tokens.GetEnumerator();
- if (!enumerator.MoveNext())
+ foreach (LrState state in Automation)
{
- throw new InvalidOperationException("Input token list is empty");
- }
-
- while (true)
- {
- AnalyseState top = stack.Peek();
-
- // 尝试进行移进
- bool acceptFlag = false, reduceFlag = false;
- foreach (Expression e in top.State.Expressions)
+ ITransformer transformer;
+ if (transformers.TryGetValue(state, out Transformer? oldTransformer))
{
- if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
+ transformer = oldTransformer;
+ }
+ else
+ {
+ Transformer newTransformer = new();
+ transformers.Add(state, newTransformer);
+ transformer = newTransformer;
+ }
+
+ // 生成归约的迁移表
+ foreach (Expression expression in state.Expressions)
+ {
+ if (expression.Pos == expression.Right.Count)
{
- if (e.Left == Begin)
- {
- acceptFlag = true;
- }
- else
- {
- reduceFlag = true;
- SyntaxNode newNode = new(e.Left.Type);
-
- for (int i = 0; i < e.Right.Count; i++)
- {
- newNode.Children.Add(stack.Pop().Node);
- }
-
- stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
- newNode));
- }
- break;
- }
-
- if (e.Right.Count == 0 && e.LookAhead == enumerator.Current)
- {
- // 考虑空产生式的归约
- // 显然空产生式是不能accept的
- reduceFlag = true;
- SyntaxNode newNode = new(e.Left.Type);
-
- stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
- newNode));
+ transformer.ReduceTable.TryAdd(expression.LookAhead, new ReduceInformation(
+ expression.Right.Count, expression.Left));
}
}
- if (acceptFlag)
+ // 生成移进的迁移表
+ foreach (KeyValuePair pair in state.Transformer)
{
- // 接受文法 退出循环
- return top.Node;
- }
-
- if (reduceFlag)
- {
- // 归约
- continue;
- }
-
- // 尝试进行移进
- if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
- {
- stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
- if (enumerator.MoveNext())
+ ITransformer targetTransformer;
+ if (transformers.TryGetValue(pair.Value, out Transformer? oldTransformer2))
{
- continue;
+ targetTransformer = oldTransformer2;
}
else
{
- throw new InvalidOperationException("Run out of token but not accept");
+ Transformer newTransformer = new();
+ transformers.Add(pair.Value, newTransformer);
+ targetTransformer = newTransformer;
}
+ transformer.ShiftTable.TryAdd(pair.Key, targetTransformer);
}
-
- throw new InvalidOperationException("Failed to analyse input grammar");
}
+
+ return new GrammarParser(transformers[BeginState], Begin);
+ }
+
+ private class GrammarParser(ITransformer beginTransformer, NonTerminator begin) : GrammarParserBase
+ {
+ public override ITransformer BeginTransformer { get; } = beginTransformer;
+ public override NonTerminator Begin { get; } = begin;
+ }
+
+ private class Transformer : ITransformer
+ {
+ public IDictionary ShiftTable { get; }
+ = new Dictionary();
+
+ public IDictionary ReduceTable { get; }
+ = new Dictionary();
}
private record AnalyseState(LrState State, SyntaxNode Node);
diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs
index 6641835..2c3a017 100644
--- a/Canon.Core/GrammarParser/GrammarBuilder.cs
+++ b/Canon.Core/GrammarParser/GrammarBuilder.cs
@@ -300,7 +300,7 @@ public class GrammarBuilder
Automation.UnionWith(addedStates);
}
- return new Grammar { Begin = Begin, BeginState = beginState };
+ return new Grammar { Begin = Begin, BeginState = beginState, Automation = Automation};
}
private static bool IsEmptyOnly(List expression)
diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs
index 6c14ae1..05c6f9d 100644
--- a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs
+++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs
@@ -1,4 +1,5 @@
-using Canon.Core.Enums;
+using Canon.Core.Abstractions;
+using Canon.Core.Enums;
using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
@@ -118,6 +119,7 @@ public class SimpleGrammarTests
};
Grammar grammar = builder.Build();
+ GrammarParserBase parser = grammar.ToGrammarParser();
// n + n
List tokens =
[
@@ -142,7 +144,7 @@ public class SimpleGrammarTests
// F n
// |
// n
- SyntaxNode root = grammar.Analyse(tokens);
+ SyntaxNode root = parser.Analyse(tokens);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Equal(3, root.Children.Count);
Assert.Contains(root.Children, node =>
@@ -168,6 +170,8 @@ public class SimpleGrammarTests
};
Grammar grammar = builder.Build();
+ GrammarParserBase parser = grammar.ToGrammarParser();
+
// (n + n) * n
List tokens =
[
@@ -193,7 +197,8 @@ public class SimpleGrammarTests
SemanticToken.End
];
- SyntaxNode root = grammar.Analyse(tokens);
+
+ SyntaxNode root = parser.Analyse(tokens);
Assert.Equal(18, root.Count());
Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs
index e711f88..c3fc9e3 100644
--- a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs
+++ b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs
@@ -1,4 +1,5 @@
-using Canon.Core.Enums;
+using Canon.Core.Abstractions;
+using Canon.Core.Enums;
using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
using Xunit.Abstractions;
@@ -151,6 +152,23 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
_testOutputHelper.WriteLine(state5.ToString());
}
+ [Fact]
+ public void ParserTest()
+ {
+ GrammarBuilder builder = new()
+ {
+ Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
+ };
+
+ Grammar grammar = builder.Build();
+ GrammarParserBase parser = grammar.ToGrammarParser();
+
+ ITransformer transformer1 = parser.BeginTransformer;
+ Assert.Equal(3, transformer1.ShiftTable.Count);
+ Assert.Single(transformer1.ReduceTable);
+ Assert.Contains(new NonTerminator(NonTerminatorType.ProgramStruct),transformer1.ShiftTable);
+ }
+
[Fact]
public void AnalyseSingleSentenceTest()
{
@@ -160,6 +178,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
};
Grammar grammar = builder.Build();
+ GrammarParserBase parser = grammar.ToGrammarParser();
List tokens =
[
@@ -168,7 +187,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper)
SemanticToken.End
];
- SyntaxNode root = grammar.Analyse(tokens);
+ SyntaxNode root = parser.Analyse(tokens);
Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
diff --git a/Canon.Tests/GrammarParserTests/TerminatorTests.cs b/Canon.Tests/GrammarParserTests/TerminatorTests.cs
index 320d3eb..470c8bc 100644
--- a/Canon.Tests/GrammarParserTests/TerminatorTests.cs
+++ b/Canon.Tests/GrammarParserTests/TerminatorTests.cs
@@ -27,11 +27,10 @@ public class TerminatorTests
public void TerminatorAndKeywordSemanticTokenTest()
{
Terminator keywordTerminator = new(KeywordType.Array);
- LinkedList keywordContent = Utils.GetLinkedList("array [3..9] of integer");
-
- Assert.True(KeywordSemanticToken.TryParse(0, 0, keywordContent.First!,
- out KeywordSemanticToken? keywordSemanticToken));
- Assert.NotNull(keywordSemanticToken);
+ KeywordSemanticToken keywordSemanticToken = new()
+ {
+ LinePos = 0, CharacterPos = 0, KeywordType = KeywordType.Array, LiteralValue = "array"
+ };
Assert.True(keywordTerminator == keywordSemanticToken);
}
@@ -39,11 +38,10 @@ public class TerminatorTests
public void TerminatorAndDelimiterSemanticTokenTest()
{
Terminator terminator = new(DelimiterType.Period);
- LinkedList content = Utils.GetLinkedList(".");
-
- Assert.True(DelimiterSemanticToken.TryParse(0, 0, content.First!,
- out DelimiterSemanticToken? token));
- Assert.NotNull(token);
+ DelimiterSemanticToken token = new()
+ {
+ LinePos = 0, CharacterPos = 0, DelimiterType = DelimiterType.Period, LiteralValue = "."
+ };
Assert.True(token == terminator);
}
@@ -51,44 +49,10 @@ public class TerminatorTests
public void TerminatorAndOperatorSemanticTokenTest()
{
Terminator terminator = new(OperatorType.GreaterEqual);
- LinkedList content = Utils.GetLinkedList(">=");
-
- Assert.True(OperatorSemanticToken.TryParse(0, 0, content.First!,
- out OperatorSemanticToken? token));
- Assert.NotNull(token);
+ OperatorSemanticToken token = new()
+ {
+ LinePos = 0, CharacterPos = 0, OperatorType = OperatorType.GreaterEqual, LiteralValue = ">="
+ };
Assert.True(token == terminator);
}
-
- [Fact]
- public void TerminatorAndNumberSemanticTokenTest()
- {
- LinkedList content = Utils.GetLinkedList("123");
-
- Assert.True(NumberSemanticToken.TryParse(0, 0, content.First!,
- out NumberSemanticToken? token));
- Assert.NotNull(token);
- Assert.True(Terminator.NumberTerminator == token);
- }
-
- [Fact]
- public void TerminatorAndCharacterSemanticTokenTest()
- {
- LinkedList content = Utils.GetLinkedList("'a'");
-
- Assert.True(CharacterSemanticToken.TryParse(0, 0, content.First!,
- out CharacterSemanticToken? token));
- Assert.NotNull(token);
- Assert.True(Terminator.CharacterTerminator == token);
- }
-
- [Fact]
- public void TerminatorAndIdentifierSemanticTokenTest()
- {
- LinkedList content = Utils.GetLinkedList("gcd");
-
- Assert.True(IdentifierSemanticToken.TryParse(0, 0, content.First!,
- out IdentifierSemanticToken? token));
- Assert.NotNull(token);
- Assert.True(Terminator.IdentifierTerminator == token);
- }
}
diff --git a/Canon.Tests/Utils.cs b/Canon.Tests/Utils.cs
index 9babb68..3b5f4bf 100644
--- a/Canon.Tests/Utils.cs
+++ b/Canon.Tests/Utils.cs
@@ -1,4 +1,6 @@
-namespace Canon.Tests;
+using Canon.Core.GrammarParser;
+
+namespace Canon.Tests;
public static class Utils
{
@@ -13,4 +15,25 @@ public static class Utils
return list;
}
+
+ ///
+ /// 验证两棵语法树一致
+ ///
+ /// 一棵语法树
+ /// 另一棵语法树
+ public static void CheckSyntaxRoot(SyntaxNode a, SyntaxNode b)
+ {
+ int length = a.Count();
+ Assert.Equal(length, b.Count());
+
+ using IEnumerator aIter = a.GetEnumerator(), bIter = b.GetEnumerator();
+
+ for (int i = 0; i < length; i++)
+ {
+ Assert.True(aIter.MoveNext());
+ Assert.True(bIter.MoveNext());
+
+ Assert.Equal(aIter.Current, bIter.Current);
+ }
+ }
}