From e191c1e077d880eaa67660433b1a2ecdf3eff722 Mon Sep 17 00:00:00 2001 From: Lan_G <2911328695@qq.com> Date: Wed, 13 Mar 2024 16:39:00 +0800 Subject: [PATCH 1/2] add: exceptions(#7) Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/7 Co-authored-by: Lan_G <2911328695@qq.com> Co-committed-by: Lan_G <2911328695@qq.com> --- Canon.Core/Exceptions/GrammarException.cs | 12 +++++++++++ Canon.Core/Exceptions/LexemeException.cs | 25 ++++++++++++++++++++++ Canon.Core/Exceptions/SemanticException.cs | 12 +++++++++++ 3 files changed, 49 insertions(+) create mode 100644 Canon.Core/Exceptions/GrammarException.cs create mode 100644 Canon.Core/Exceptions/LexemeException.cs create mode 100644 Canon.Core/Exceptions/SemanticException.cs diff --git a/Canon.Core/Exceptions/GrammarException.cs b/Canon.Core/Exceptions/GrammarException.cs new file mode 100644 index 0000000..59fa1de --- /dev/null +++ b/Canon.Core/Exceptions/GrammarException.cs @@ -0,0 +1,12 @@ +namespace Canon.Core.Exceptions; +/// +/// 语法分析中引发的异常 +/// +public class GrammarException : Exception +{ + public GrammarException() { } + + public GrammarException(string message) : base(message) { } + + public GrammarException(string message, Exception innerException) : base(message, innerException) { } +} diff --git a/Canon.Core/Exceptions/LexemeException.cs b/Canon.Core/Exceptions/LexemeException.cs new file mode 100644 index 0000000..b20c4ed --- /dev/null +++ b/Canon.Core/Exceptions/LexemeException.cs @@ -0,0 +1,25 @@ +namespace Canon.Core.Exceptions; +/// +/// 词法分析中引发的异常 +/// +public class LexemeException : Exception +{ + public LexemeException() { } + + public LexemeException(string message) : base(message) { } + + public LexemeException(string message, Exception innerException) : + base(message, innerException) { } + + /// 单词的行号 + /// 单词的列号 + /// 错误信息 + public LexemeException(uint line, uint charPosition, string message) : + base("line:" + line + ", charPosition:" + charPosition + " :" + message) { } + + public LexemeException(uint line, uint charPosition, Exception innerException) : + base("line:" + line + ", charPosition:" + charPosition + " : ", innerException) { } + + public LexemeException(uint line, uint charPosition, string message, Exception innerException) : + base("line:" + line + ", charPosition:" + charPosition + " :" + message, innerException) { } +} diff --git a/Canon.Core/Exceptions/SemanticException.cs b/Canon.Core/Exceptions/SemanticException.cs new file mode 100644 index 0000000..ddb9201 --- /dev/null +++ b/Canon.Core/Exceptions/SemanticException.cs @@ -0,0 +1,12 @@ +namespace Canon.Core.Exceptions; +/// +/// 语义分析中引发的异常 +/// +public class SemanticException : Exception +{ + public SemanticException() : base() { } + + public SemanticException(string message) : base(message) { } + + public SemanticException(string message, Exception innerException) : base(message, innerException) { } +} From bd3db1b7b7e2995371496533bf0081c04348efd5 Mon Sep 17 00:00:00 2001 From: jackfiled Date: Wed, 13 Mar 2024 16:41:44 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E8=AF=AD?= =?UTF-8?q?=E6=B3=95=E5=88=86=E6=9E=90=E5=9F=BA=E7=B1=BB=E6=8A=BD=E8=B1=A1?= =?UTF-8?q?=20(#8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加语法分析基类和状态转换接口抽象,为直接生成语法分析器做准备,同时也提前释放一些大对象,降低内存消耗。 Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/8 --- Canon.Core/Abstractions/GrammarParseBase.cs | 70 ++++++++++ Canon.Core/Abstractions/ITransformer.cs | 26 ++++ Canon.Core/GrammarParser/Grammar.cs | 128 +++++++++--------- Canon.Core/GrammarParser/GrammarBuilder.cs | 2 +- .../GrammarParserTests/SimpleGrammarTests.cs | 11 +- .../SimpleGrammarWithEmptyTests.cs | 23 +++- .../GrammarParserTests/TerminatorTests.cs | 60 ++------ Canon.Tests/Utils.cs | 25 +++- 8 files changed, 224 insertions(+), 121 deletions(-) create mode 100644 Canon.Core/Abstractions/GrammarParseBase.cs create mode 100644 Canon.Core/Abstractions/ITransformer.cs diff --git a/Canon.Core/Abstractions/GrammarParseBase.cs b/Canon.Core/Abstractions/GrammarParseBase.cs new file mode 100644 index 0000000..aca801c --- /dev/null +++ b/Canon.Core/Abstractions/GrammarParseBase.cs @@ -0,0 +1,70 @@ +using Canon.Core.GrammarParser; +using Canon.Core.LexicalParser; + +namespace Canon.Core.Abstractions; + +/// +/// 语法分析器接口 +/// +public abstract class GrammarParserBase +{ + public abstract ITransformer BeginTransformer { get; } + + public abstract NonTerminator Begin { get; } + + public SyntaxNode Analyse(IEnumerable tokens) + { + Stack stack = []; + stack.Push(new AnalyseState(BeginTransformer, new SyntaxNode(SemanticToken.End))); + + using IEnumerator enumerator = tokens.GetEnumerator(); + if (!enumerator.MoveNext()) + { + throw new InvalidOperationException("Input token list is empty"); + } + + while (true) + { + AnalyseState top = stack.Peek(); + + // 首先尝试进行归约 + if (top.State.ReduceTable.TryGetValue(enumerator.Current, out ReduceInformation? information)) + { + if (information.Left == Begin) + { + // 如果是归约到起始符 + // 那么就直接返回不继续进行归约 + return top.Node; + } + + SyntaxNode newNode = new(information.Left.Type); + for (int i = 0; i < information.Length; i++) + { + newNode.Children.Add(stack.Pop().Node); + } + + stack.Push(new AnalyseState(stack.Peek().State.ShiftTable[information.Left], + newNode)); + continue; + } + + // 如果没有成功归约就进行移进 + if (top.State.ShiftTable.TryGetValue(enumerator.Current, out ITransformer? next)) + { + stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current))); + if (enumerator.MoveNext()) + { + continue; + } + else + { + throw new InvalidOperationException("Run out of token but not accept"); + } + } + + throw new InvalidOperationException("Failed to analyse input grammar"); + } + } + + private record AnalyseState(ITransformer State, SyntaxNode Node); +} diff --git a/Canon.Core/Abstractions/ITransformer.cs b/Canon.Core/Abstractions/ITransformer.cs new file mode 100644 index 0000000..aad519c --- /dev/null +++ b/Canon.Core/Abstractions/ITransformer.cs @@ -0,0 +1,26 @@ +using Canon.Core.GrammarParser; + +namespace Canon.Core.Abstractions; + +/// +/// 进行归约需要的信息 +/// +/// 归约的长度 +/// 归约得到的左部符号 +public record ReduceInformation(int Length, NonTerminator Left); + +/// +/// 状态的各种迁移信息 +/// +public interface ITransformer +{ + /// + /// 进行移进的信息 + /// + public IDictionary ShiftTable { get; } + + /// + /// 进行归约的信息 + /// + public IDictionary ReduceTable { get; } +} diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs index 7404db8..b432889 100644 --- a/Canon.Core/GrammarParser/Grammar.cs +++ b/Canon.Core/GrammarParser/Grammar.cs @@ -1,94 +1,90 @@ -using Canon.Core.LexicalParser; +using Canon.Core.Abstractions; +using Canon.Core.LexicalParser; namespace Canon.Core.GrammarParser; +/// +/// 通过LR分析方法建立的语法 +/// public class Grammar { + /// + /// 起始符 + /// public required NonTerminator Begin { get; init; } + /// + /// 语法中的DFA + /// + public required HashSet Automation { get; init; } + + /// + /// 起始状态 + /// public required LrState BeginState { get; init; } - public SyntaxNode Analyse(IEnumerable tokens) + public GrammarParserBase ToGrammarParser() { - Stack stack = []; - stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End))); + Dictionary transformers = []; - using IEnumerator enumerator = tokens.GetEnumerator(); - if (!enumerator.MoveNext()) + foreach (LrState state in Automation) { - throw new InvalidOperationException("Input token list is empty"); - } - - while (true) - { - AnalyseState top = stack.Peek(); - - // 尝试进行移进 - bool acceptFlag = false, reduceFlag = false; - foreach (Expression e in top.State.Expressions) + ITransformer transformer; + if (transformers.TryGetValue(state, out Transformer? oldTransformer)) { - if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current) + transformer = oldTransformer; + } + else + { + Transformer newTransformer = new(); + transformers.Add(state, newTransformer); + transformer = newTransformer; + } + + // 生成归约的迁移表 + foreach (Expression expression in state.Expressions) + { + if (expression.Pos == expression.Right.Count) { - if (e.Left == Begin) - { - acceptFlag = true; - } - else - { - reduceFlag = true; - SyntaxNode newNode = new(e.Left.Type); - - for (int i = 0; i < e.Right.Count; i++) - { - newNode.Children.Add(stack.Pop().Node); - } - - stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left], - newNode)); - } - break; - } - - if (e.Right.Count == 0 && e.LookAhead == enumerator.Current) - { - // 考虑空产生式的归约 - // 显然空产生式是不能accept的 - reduceFlag = true; - SyntaxNode newNode = new(e.Left.Type); - - stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left], - newNode)); + transformer.ReduceTable.TryAdd(expression.LookAhead, new ReduceInformation( + expression.Right.Count, expression.Left)); } } - if (acceptFlag) + // 生成移进的迁移表 + foreach (KeyValuePair pair in state.Transformer) { - // 接受文法 退出循环 - return top.Node; - } - - if (reduceFlag) - { - // 归约 - continue; - } - - // 尝试进行移进 - if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next)) - { - stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current))); - if (enumerator.MoveNext()) + ITransformer targetTransformer; + if (transformers.TryGetValue(pair.Value, out Transformer? oldTransformer2)) { - continue; + targetTransformer = oldTransformer2; } else { - throw new InvalidOperationException("Run out of token but not accept"); + Transformer newTransformer = new(); + transformers.Add(pair.Value, newTransformer); + targetTransformer = newTransformer; } + transformer.ShiftTable.TryAdd(pair.Key, targetTransformer); } - - throw new InvalidOperationException("Failed to analyse input grammar"); } + + return new GrammarParser(transformers[BeginState], Begin); + } + + private class GrammarParser(ITransformer beginTransformer, NonTerminator begin) : GrammarParserBase + { + public override ITransformer BeginTransformer { get; } = beginTransformer; + public override NonTerminator Begin { get; } = begin; + } + + private class Transformer : ITransformer + { + public IDictionary ShiftTable { get; } + = new Dictionary(); + + public IDictionary ReduceTable { get; } + = new Dictionary(); } private record AnalyseState(LrState State, SyntaxNode Node); diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs index 6641835..2c3a017 100644 --- a/Canon.Core/GrammarParser/GrammarBuilder.cs +++ b/Canon.Core/GrammarParser/GrammarBuilder.cs @@ -300,7 +300,7 @@ public class GrammarBuilder Automation.UnionWith(addedStates); } - return new Grammar { Begin = Begin, BeginState = beginState }; + return new Grammar { Begin = Begin, BeginState = beginState, Automation = Automation}; } private static bool IsEmptyOnly(List expression) diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs index 6c14ae1..05c6f9d 100644 --- a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs @@ -1,4 +1,5 @@ -using Canon.Core.Enums; +using Canon.Core.Abstractions; +using Canon.Core.Enums; using Canon.Core.GrammarParser; using Canon.Core.LexicalParser; @@ -118,6 +119,7 @@ public class SimpleGrammarTests }; Grammar grammar = builder.Build(); + GrammarParserBase parser = grammar.ToGrammarParser(); // n + n List tokens = [ @@ -142,7 +144,7 @@ public class SimpleGrammarTests // F n // | // n - SyntaxNode root = grammar.Analyse(tokens); + SyntaxNode root = parser.Analyse(tokens); Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); Assert.Equal(3, root.Children.Count); Assert.Contains(root.Children, node => @@ -168,6 +170,8 @@ public class SimpleGrammarTests }; Grammar grammar = builder.Build(); + GrammarParserBase parser = grammar.ToGrammarParser(); + // (n + n) * n List tokens = [ @@ -193,7 +197,8 @@ public class SimpleGrammarTests SemanticToken.End ]; - SyntaxNode root = grammar.Analyse(tokens); + + SyntaxNode root = parser.Analyse(tokens); Assert.Equal(18, root.Count()); Assert.False(root.IsTerminated); Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs index e711f88..c3fc9e3 100644 --- a/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarWithEmptyTests.cs @@ -1,4 +1,5 @@ -using Canon.Core.Enums; +using Canon.Core.Abstractions; +using Canon.Core.Enums; using Canon.Core.GrammarParser; using Canon.Core.LexicalParser; using Xunit.Abstractions; @@ -151,6 +152,23 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) _testOutputHelper.WriteLine(state5.ToString()); } + [Fact] + public void ParserTest() + { + GrammarBuilder builder = new() + { + Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator) + }; + + Grammar grammar = builder.Build(); + GrammarParserBase parser = grammar.ToGrammarParser(); + + ITransformer transformer1 = parser.BeginTransformer; + Assert.Equal(3, transformer1.ShiftTable.Count); + Assert.Single(transformer1.ReduceTable); + Assert.Contains(new NonTerminator(NonTerminatorType.ProgramStruct),transformer1.ShiftTable); + } + [Fact] public void AnalyseSingleSentenceTest() { @@ -160,6 +178,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) }; Grammar grammar = builder.Build(); + GrammarParserBase parser = grammar.ToGrammarParser(); List tokens = [ @@ -168,7 +187,7 @@ public class SimpleGrammarWithEmptyTests(ITestOutputHelper testOutputHelper) SemanticToken.End ]; - SyntaxNode root = grammar.Analyse(tokens); + SyntaxNode root = parser.Analyse(tokens); Assert.False(root.IsTerminated); Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); diff --git a/Canon.Tests/GrammarParserTests/TerminatorTests.cs b/Canon.Tests/GrammarParserTests/TerminatorTests.cs index 320d3eb..470c8bc 100644 --- a/Canon.Tests/GrammarParserTests/TerminatorTests.cs +++ b/Canon.Tests/GrammarParserTests/TerminatorTests.cs @@ -27,11 +27,10 @@ public class TerminatorTests public void TerminatorAndKeywordSemanticTokenTest() { Terminator keywordTerminator = new(KeywordType.Array); - LinkedList keywordContent = Utils.GetLinkedList("array [3..9] of integer"); - - Assert.True(KeywordSemanticToken.TryParse(0, 0, keywordContent.First!, - out KeywordSemanticToken? keywordSemanticToken)); - Assert.NotNull(keywordSemanticToken); + KeywordSemanticToken keywordSemanticToken = new() + { + LinePos = 0, CharacterPos = 0, KeywordType = KeywordType.Array, LiteralValue = "array" + }; Assert.True(keywordTerminator == keywordSemanticToken); } @@ -39,11 +38,10 @@ public class TerminatorTests public void TerminatorAndDelimiterSemanticTokenTest() { Terminator terminator = new(DelimiterType.Period); - LinkedList content = Utils.GetLinkedList("."); - - Assert.True(DelimiterSemanticToken.TryParse(0, 0, content.First!, - out DelimiterSemanticToken? token)); - Assert.NotNull(token); + DelimiterSemanticToken token = new() + { + LinePos = 0, CharacterPos = 0, DelimiterType = DelimiterType.Period, LiteralValue = "." + }; Assert.True(token == terminator); } @@ -51,44 +49,10 @@ public class TerminatorTests public void TerminatorAndOperatorSemanticTokenTest() { Terminator terminator = new(OperatorType.GreaterEqual); - LinkedList content = Utils.GetLinkedList(">="); - - Assert.True(OperatorSemanticToken.TryParse(0, 0, content.First!, - out OperatorSemanticToken? token)); - Assert.NotNull(token); + OperatorSemanticToken token = new() + { + LinePos = 0, CharacterPos = 0, OperatorType = OperatorType.GreaterEqual, LiteralValue = ">=" + }; Assert.True(token == terminator); } - - [Fact] - public void TerminatorAndNumberSemanticTokenTest() - { - LinkedList content = Utils.GetLinkedList("123"); - - Assert.True(NumberSemanticToken.TryParse(0, 0, content.First!, - out NumberSemanticToken? token)); - Assert.NotNull(token); - Assert.True(Terminator.NumberTerminator == token); - } - - [Fact] - public void TerminatorAndCharacterSemanticTokenTest() - { - LinkedList content = Utils.GetLinkedList("'a'"); - - Assert.True(CharacterSemanticToken.TryParse(0, 0, content.First!, - out CharacterSemanticToken? token)); - Assert.NotNull(token); - Assert.True(Terminator.CharacterTerminator == token); - } - - [Fact] - public void TerminatorAndIdentifierSemanticTokenTest() - { - LinkedList content = Utils.GetLinkedList("gcd"); - - Assert.True(IdentifierSemanticToken.TryParse(0, 0, content.First!, - out IdentifierSemanticToken? token)); - Assert.NotNull(token); - Assert.True(Terminator.IdentifierTerminator == token); - } } diff --git a/Canon.Tests/Utils.cs b/Canon.Tests/Utils.cs index 9babb68..3b5f4bf 100644 --- a/Canon.Tests/Utils.cs +++ b/Canon.Tests/Utils.cs @@ -1,4 +1,6 @@ -namespace Canon.Tests; +using Canon.Core.GrammarParser; + +namespace Canon.Tests; public static class Utils { @@ -13,4 +15,25 @@ public static class Utils return list; } + + /// + /// 验证两棵语法树一致 + /// + /// 一棵语法树 + /// 另一棵语法树 + public static void CheckSyntaxRoot(SyntaxNode a, SyntaxNode b) + { + int length = a.Count(); + Assert.Equal(length, b.Count()); + + using IEnumerator aIter = a.GetEnumerator(), bIter = b.GetEnumerator(); + + for (int i = 0; i < length; i++) + { + Assert.True(aIter.MoveNext()); + Assert.True(bIter.MoveNext()); + + Assert.Equal(aIter.Current, bIter.Current); + } + } }