diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs index 6f1d613..e4db238 100644 --- a/Canon.Core/GrammarParser/GrammarBuilder.cs +++ b/Canon.Core/GrammarParser/GrammarBuilder.cs @@ -5,7 +5,7 @@ public class GrammarBuilder /// /// 指定文法的生成式 /// - public Dictionary>> Generators { get; } = []; + public Dictionary>> Generators { get; init; } = []; /// /// 文法的起始符 @@ -108,6 +108,7 @@ public class GrammarBuilder { // 指定表达式开头是终结符 Terminator terminator = (Terminator)expressionHead; + result.Add(terminator); } else @@ -139,8 +140,18 @@ public class GrammarBuilder { changed = false; + // 不能在foreach过程中修改集合 + // 因此需要在遍历完成之后添加 + List addedExpressions = []; + foreach (Expression e in closure) { + if (e.Pos >= e.Right.Count) + { + // 已经移进到达句型的末尾 + continue; + } + TerminatorBase next = e.Right[e.Pos]; if (next.IsTerminated) @@ -169,13 +180,21 @@ public class GrammarBuilder Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead }; - if (closure.Add(newExpression)) + if (!closure.Contains(newExpression)) { - changed = true; + addedExpressions.Add(newExpression); } } } } + + foreach (Expression addedExpression in addedExpressions) + { + if (closure.Add(addedExpression)) + { + changed = true; + } + } } return closure; @@ -197,7 +216,8 @@ public class GrammarBuilder bool added = true; while (added) { - added = false; + // 同样不能在foreach期间修改集合 + HashSet addedStates = []; foreach (LrState state in Automation) { @@ -206,20 +226,19 @@ public class GrammarBuilder foreach (Expression e in state.Expressions) { + if (e.Pos >= e.Right.Count) + { + // 已经移进到达末尾 + continue; + } + + TerminatorBase next = e.Right[e.Pos]; Expression nextExpression = new() { Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos }; - - if (nextExpression.Pos >= nextExpression.Right.Count) - { - // 移进符号已经到达句型的末尾 - continue; - } - nextExpression.Pos += 1; - TerminatorBase next = nextExpression.Right[nextExpression.Pos]; if (!nextExpressions.TryAdd(next, [nextExpression])) { nextExpressions[next].Add(nextExpression); @@ -241,18 +260,27 @@ public class GrammarBuilder if (Automation.TryGetValue(newState, out LrState? oldState)) { // 存在这个项目集闭包 - state.Transformer.Add(pair.Key, oldState); + state.AddTransform(pair.Key, oldState); } else { // 不存在这个项目集闭包 - Automation.Add(newState); - state.Transformer.Add(pair.Key, newState); - - added = true; + // 但是需要考虑该状态在addedStates集合中的情况 + if (addedStates.TryGetValue(newState, out LrState? addedState)) + { + state.AddTransform(pair.Key, addedState); + } + else + { + state.AddTransform(pair.Key, newState); + addedStates.Add(newState); + } } } } + + added = addedStates.Count != 0; + Automation.UnionWith(addedStates); } return new Grammar { Begin = Begin, BeginState = beginState }; diff --git a/Canon.Core/GrammarParser/LrState.cs b/Canon.Core/GrammarParser/LrState.cs index b5ffb65..817a986 100644 --- a/Canon.Core/GrammarParser/LrState.cs +++ b/Canon.Core/GrammarParser/LrState.cs @@ -16,6 +16,28 @@ public class LrState : IEquatable /// public Dictionary Transformer { get; } = []; + /// + /// 向状态中添加一个迁移规则 + /// + /// 迁移的条件 + /// 迁移到达的状态 + /// 如果在状态中已经存在该迁移规则且迁移到的状态和欲设置的状态不同 + /// 抛出无效操作异常 + public void AddTransform(TerminatorBase terminator, LrState next) + { + if (Transformer.TryGetValue(terminator, out LrState? state)) + { + if (state != next) + { + throw new InvalidOperationException("A terminator transform to two different states"); + } + } + else + { + Transformer.Add(terminator, next); + } + } + public bool Equals(LrState? other) { if (other is null) diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs new file mode 100644 index 0000000..8b32d96 --- /dev/null +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs @@ -0,0 +1,110 @@ +using Canon.Core.Enums; +using Canon.Core.GrammarParser; + +namespace Canon.Tests.GrammarParserTests; + +public class SimpleGrammarTests +{ + /// + /// 用于测试的简单语法 + /// S -> E + /// E -> E+T | E-T | T + /// T -> T*F | T/F | F + /// F -> (E) | n + /// 为了方便测试指定 + /// E ProgramStruct + /// T ProgramBody + /// F StatementList + /// n Identifier + /// + private static readonly Dictionary>> s_simpleGrammar = new() + { + { + new NonTerminator(NonTerminatorType.StartNonTerminator), [ + [new NonTerminator(NonTerminatorType.ProgramStruct)] + ] + }, + { + new NonTerminator(NonTerminatorType.ProgramStruct), [ + [ + new NonTerminator(NonTerminatorType.ProgramStruct), new Terminator(OperatorType.Plus), + new NonTerminator(NonTerminatorType.ProgramBody) + ], + [ + new NonTerminator(NonTerminatorType.ProgramStruct), new Terminator(OperatorType.Minus), + new NonTerminator(NonTerminatorType.ProgramBody) + ], + [new NonTerminator(NonTerminatorType.ProgramBody)] + ] + }, + { + new NonTerminator(NonTerminatorType.ProgramBody), [ + [ + new NonTerminator(NonTerminatorType.ProgramBody), new Terminator(OperatorType.Multiply), + new NonTerminator(NonTerminatorType.StatementList) + ], + [ + new NonTerminator(NonTerminatorType.ProgramBody), new Terminator(OperatorType.Divide), + new NonTerminator(NonTerminatorType.StatementList) + ], + [new NonTerminator(NonTerminatorType.StatementList)] + ] + }, + { + new NonTerminator(NonTerminatorType.StatementList), [ + [ + new Terminator(DelimiterType.LeftParenthesis), new NonTerminator(NonTerminatorType.ProgramStruct), + new Terminator(DelimiterType.RightParenthesis) + ], + [Terminator.IdentifierTerminator] + ] + } + }; + + [Fact] + public void FirstSetTest() + { + GrammarBuilder builder = new() + { + Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator) + }; + + builder.Build(); + + Assert.Contains(builder.FirstSet, pair => + pair.Key == new NonTerminator(NonTerminatorType.StartNonTerminator)); + Assert.Contains(builder.FirstSet, pair => + pair.Key == new NonTerminator(NonTerminatorType.ProgramStruct)); + Assert.Contains(builder.FirstSet, pair => + pair.Key == new NonTerminator(NonTerminatorType.ProgramBody)); + Assert.Contains(builder.FirstSet, pair => + pair.Key == new NonTerminator(NonTerminatorType.StatementList)); + + foreach (HashSet terminators in builder.FirstSet.Values) + { + Assert.Contains(Terminator.IdentifierTerminator, terminators); + Assert.Contains(new Terminator(DelimiterType.LeftParenthesis), terminators); + } + } + + [Fact] + public void StatsTest() + { + GrammarBuilder builder = new() + { + Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator) + }; + + Grammar grammar = builder.Build(); + + Assert.Equal(30, builder.Automation.Count); + + // 来自Ichirinko不辞辛劳的手算 + Assert.Contains(new NonTerminator(NonTerminatorType.ProgramStruct), grammar.BeginState.Transformer.Keys); + Assert.Contains(new NonTerminator(NonTerminatorType.ProgramBody), grammar.BeginState.Transformer.Keys); + Assert.Contains(new NonTerminator(NonTerminatorType.StatementList), + grammar.BeginState.Transformer.Keys); + Assert.Contains(new Terminator(DelimiterType.LeftParenthesis), grammar.BeginState.Transformer.Keys); + Assert.Contains(Terminator.IdentifierTerminator, grammar.BeginState.Transformer.Keys); + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..8a8c406 --- /dev/null +++ b/README.md @@ -0,0 +1,21 @@ +# Canon + +简单的`Pascal`编译器。 + +## 项目结构 + +主要由以下三个项目组成: + +- `Canon.Core` 核心的编译器库,负责词法分析、语法分析、语义分析和目标代码生成等等一系列工作; +- `Canon.Console`编译器的控制台应用程序,负责处理命令行参数和读写文件等等工作; +- `Canon.Tests`对`Canon.Core`进行测试的测试库。 + +## 开始使用 + +需要: + +- `Dotnet SDK 8.0` +- `Visual Studio 2022`或者`Rider 2023.3.3` + +才能运行和编辑该项目。 +