diff --git a/Canon.Core/Enums/GrammarEnums.cs b/Canon.Core/Enums/GrammarEnums.cs index e438969..78c4ab2 100644 --- a/Canon.Core/Enums/GrammarEnums.cs +++ b/Canon.Core/Enums/GrammarEnums.cs @@ -2,6 +2,10 @@ public enum NonTerminatorType { + /// + /// 拓广文法 + /// + StartNonTerminator, ProgramStruct, ProgramHead, ProgramBody, diff --git a/Canon.Core/Enums/SemanticEnums.cs b/Canon.Core/Enums/SemanticEnums.cs index 270c329..79dd150 100644 --- a/Canon.Core/Enums/SemanticEnums.cs +++ b/Canon.Core/Enums/SemanticEnums.cs @@ -8,6 +8,10 @@ public enum SemanticTokenType Delimiter, Identifier, Character, + /// + /// 语法分析中的栈底符号 + /// + End } public enum DelimiterType diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs new file mode 100644 index 0000000..d16a16f --- /dev/null +++ b/Canon.Core/GrammarParser/Grammar.cs @@ -0,0 +1,8 @@ +namespace Canon.Core.GrammarParser; + +public class Grammar +{ + public required NonTerminator Begin { get; init; } + + public required LrState BeginState { get; init; } +} diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs new file mode 100644 index 0000000..6f1d613 --- /dev/null +++ b/Canon.Core/GrammarParser/GrammarBuilder.cs @@ -0,0 +1,260 @@ +namespace Canon.Core.GrammarParser; + +public class GrammarBuilder +{ + /// + /// 指定文法的生成式 + /// + public Dictionary>> Generators { get; } = []; + + /// + /// 文法的起始符 + /// + public required NonTerminator Begin { get; init; } + + /// + /// 文法中所有非终结符的First集合 + /// + public Dictionary> FirstSet { get; } = []; + + public HashSet Automation { get; } = []; + + /// + /// 构建文法中所有非终结符的First集合 + /// + private void BuildFirstSet() + { + bool changed = true; + + while (changed) + { + changed = false; + + foreach (KeyValuePair>> pair in Generators) + { + foreach (List expression in pair.Value) + { + // 对于空产生式直接跳过处理是正确的吗? + TerminatorBase? expressionHead = expression.FirstOrDefault(); + if (expressionHead is null) + { + continue; + } + + + if (expressionHead.IsTerminated) + { + // 产生式的第一个字符是终结符 + // 将这个终结符加入该非终结符的First集合 + Terminator terminator = (Terminator)expressionHead; + + if (FirstSet.TryAdd(pair.Key, [terminator])) + { + changed = true; + } + else + { + if (FirstSet[pair.Key].Add(terminator)) + { + changed = true; + } + } + } + else + { + NonTerminator nonTerminator = (NonTerminator)expressionHead; + // 产生式的第一个字符是非终结符 + // 将该非终结符的结果合并到该 + if (FirstSet.TryGetValue(nonTerminator, out HashSet? value)) + { + foreach (Terminator first in value) + { + if (FirstSet.TryAdd(pair.Key, [first])) + { + changed = true; + } + else + { + if (FirstSet[pair.Key].Add(first)) + { + changed = true; + } + } + } + } + } + } + } + } + } + + /// + /// 计算指定语句的First集合 + /// 需要用到非终结符的First集合 + /// + /// 需要计算的语句 + /// 指定语句的First集合 + private HashSet CalculateFirstSetOfExpression(List expression) + { + HashSet result = []; + + TerminatorBase? expressionHead = expression.FirstOrDefault(); + if (expressionHead is null) + { + return result; + } + + if (expressionHead.IsTerminated) + { + // 指定表达式开头是终结符 + Terminator terminator = (Terminator)expressionHead; + result.Add(terminator); + } + else + { + // 指定表达式开头是非终结符 + // 将该非终结符的FirstSet加入进来 + NonTerminator nonTerminator = (NonTerminator)expressionHead; + + if (FirstSet.TryGetValue(nonTerminator, out HashSet? firstSet)) + { + result.UnionWith(firstSet); + } + } + + return result; + } + + /// + /// 计算指定表达式的项目集规范族闭包 + /// + /// 指定的表达式 + /// 指定表达式的项目集规范族闭包 + private HashSet CalculateClosure(Expression expression) + { + HashSet closure = [expression]; + + bool changed = true; + while (changed) + { + changed = false; + + foreach (Expression e in closure) + { + TerminatorBase next = e.Right[e.Pos]; + + if (next.IsTerminated) + { + continue; + } + + NonTerminator nonTerminator = (NonTerminator)next; + + // 将当前未移进的字符和向前看字符拼接为新的向前看表达式 + List ahead = []; + for (int i = e.Pos + 1; i < e.Right.Count; i++) + { + ahead.Add(e.Right[i]); + } + ahead.Add(e.LookAhead); + + HashSet lookAheadSet = CalculateFirstSetOfExpression(ahead); + + foreach (List nextExpression in Generators[nonTerminator]) + { + foreach (Terminator lookAhead in lookAheadSet) + { + Expression newExpression = new() + { + Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead + }; + + if (closure.Add(newExpression)) + { + changed = true; + } + } + } + } + } + + return closure; + } + + public Grammar Build() + { + // 开始之前构建FirstSet + BuildFirstSet(); + + Expression begin = new() + { + Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator + }; + + LrState beginState = new() { Expressions = CalculateClosure(begin) }; + Automation.Add(beginState); + + bool added = true; + while (added) + { + added = false; + + foreach (LrState state in Automation) + { + // 表示使用key进行移进可以生成的新LR(1)句型 + Dictionary> nextExpressions = []; + + foreach (Expression e in state.Expressions) + { + Expression nextExpression = new() + { + Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + }; + + if (nextExpression.Pos >= nextExpression.Right.Count) + { + // 移进符号已经到达句型的末尾 + continue; + } + + nextExpression.Pos += 1; + + TerminatorBase next = nextExpression.Right[nextExpression.Pos]; + if (!nextExpressions.TryAdd(next, [nextExpression])) + { + nextExpressions[next].Add(nextExpression); + } + } + + foreach (KeyValuePair> pair in nextExpressions) + { + // 针对每个构建项目集闭包 + HashSet closure = []; + + foreach (Expression expression in pair.Value) + { + closure.UnionWith(CalculateClosure(expression)); + } + + LrState newState = new() { Expressions = closure }; + + if (Automation.TryGetValue(newState, out LrState? oldState)) + { + // 存在这个项目集闭包 + state.Transformer.Add(pair.Key, oldState); + } + else + { + // 不存在这个项目集闭包 + Automation.Add(newState); + state.Transformer.Add(pair.Key, newState); + + added = true; + } + } + } + } + + return new Grammar { Begin = Begin, BeginState = beginState }; + } +} diff --git a/Canon.Core/GrammarParser/Terminator.cs b/Canon.Core/GrammarParser/Terminator.cs index faab1f8..ca7b3b4 100644 --- a/Canon.Core/GrammarParser/Terminator.cs +++ b/Canon.Core/GrammarParser/Terminator.cs @@ -63,6 +63,11 @@ public class Terminator : TerminatorBase, IEquatable /// public static Terminator NumberTerminator => new(SemanticTokenType.Number); + /// + /// 栈底的终结符 + /// + public static Terminator EndTerminator => new(KeywordType.End); + public override int GetHashCode() { int hash = _terminatorType.GetHashCode();