From 315deaabf2fcc9d6969c660328b81316d06bf43d Mon Sep 17 00:00:00 2001 From: jackfiled Date: Mon, 11 Mar 2024 21:57:47 +0800 Subject: [PATCH] =?UTF-8?q?add:=20=E6=8A=BD=E8=B1=A1=E8=AF=AD=E6=B3=95?= =?UTF-8?q?=E6=A0=91=E8=8A=82=E7=82=B9=20(#5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix: - Expression格式化过程中如果Pos在最右边就不显示 - Expression中不考虑Pos Reviewed-on: https://git.rrricardo.top/PostGuard/Canon/pulls/5 --- Canon.Core/GrammarParser/Expression.cs | 11 +- Canon.Core/GrammarParser/Grammar.cs | 25 ++-- Canon.Core/GrammarParser/GrammarBuilder.cs | 7 +- Canon.Core/GrammarParser/SyntaxNode.cs | 130 ++++++++++++++++++ Canon.Core/GrammarParser/Terminator.cs | 17 +-- .../GrammarParserTests/SimpleGrammarTests.cs | 71 +++++++++- 6 files changed, 232 insertions(+), 29 deletions(-) create mode 100644 Canon.Core/GrammarParser/SyntaxNode.cs diff --git a/Canon.Core/GrammarParser/Expression.cs b/Canon.Core/GrammarParser/Expression.cs index 6ccd9e0..5083027 100644 --- a/Canon.Core/GrammarParser/Expression.cs +++ b/Canon.Core/GrammarParser/Expression.cs @@ -24,7 +24,7 @@ public class Expression : IEquatable /// /// 当前移进的位置 /// - public int Pos { get; set; } + public required int Pos { get; init; } public bool Equals(Expression? other) { @@ -52,7 +52,8 @@ public class Expression : IEquatable } return Left == other.Left - && LookAhead == other.LookAhead; + && LookAhead == other.LookAhead + && Pos == other.Pos; } public override bool Equals(object? obj) @@ -69,6 +70,7 @@ public class Expression : IEquatable { int hash = Left.GetHashCode(); hash ^= LookAhead.GetHashCode(); + hash ^= Pos.GetHashCode(); foreach (TerminatorBase terminator in Right) { @@ -93,6 +95,11 @@ public class Expression : IEquatable result += Right[i].ToString(); } + if (Pos == Right.Count) + { + result += '~'; + } + result += $", {LookAhead}"; return result; diff --git a/Canon.Core/GrammarParser/Grammar.cs b/Canon.Core/GrammarParser/Grammar.cs index 8a55ae4..18788df 100644 --- a/Canon.Core/GrammarParser/Grammar.cs +++ b/Canon.Core/GrammarParser/Grammar.cs @@ -8,10 +8,10 @@ public class Grammar public required LrState BeginState { get; init; } - public void Analyse(IEnumerable tokens) + public SyntaxNode Analyse(IEnumerable tokens) { - Stack stack = []; - stack.Push(BeginState); + Stack stack = []; + stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End))); using IEnumerator enumerator = tokens.GetEnumerator(); if (!enumerator.MoveNext()) @@ -21,11 +21,11 @@ public class Grammar while (true) { - LrState top = stack.Peek(); + AnalyseState top = stack.Peek(); // 尝试进行移进 bool acceptFlag = false, reduceFlag = false; - foreach (Expression e in top.Expressions) + foreach (Expression e in top.State.Expressions) { if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current) { @@ -36,21 +36,24 @@ public class Grammar else { reduceFlag = true; + SyntaxNode newNode = new(e.Left.Type); for (int i = 0; i < e.Right.Count; i++) { - stack.Pop(); + newNode.Children.Add(stack.Pop().Node); } - stack.Push(stack.Peek().Transformer[e.Left]); + stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left], + newNode)); } + break; } } if (acceptFlag) { // 接受文法 退出循环 - break; + return top.Node; } if (reduceFlag) @@ -60,9 +63,9 @@ public class Grammar } // 尝试进行移进 - if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next)) + if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next)) { - stack.Push(next); + stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current))); if (enumerator.MoveNext()) { continue; @@ -76,4 +79,6 @@ public class Grammar throw new InvalidOperationException("Failed to analyse input grammar"); } } + + private record AnalyseState(LrState State, SyntaxNode Node); } diff --git a/Canon.Core/GrammarParser/GrammarBuilder.cs b/Canon.Core/GrammarParser/GrammarBuilder.cs index e4db238..ab305a9 100644 --- a/Canon.Core/GrammarParser/GrammarBuilder.cs +++ b/Canon.Core/GrammarParser/GrammarBuilder.cs @@ -177,7 +177,7 @@ public class GrammarBuilder { Expression newExpression = new() { - Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead + Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0 }; if (!closure.Contains(newExpression)) @@ -207,7 +207,7 @@ public class GrammarBuilder Expression begin = new() { - Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator + Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0 }; LrState beginState = new() { Expressions = CalculateClosure(begin) }; @@ -235,9 +235,8 @@ public class GrammarBuilder TerminatorBase next = e.Right[e.Pos]; Expression nextExpression = new() { - Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + 1 }; - nextExpression.Pos += 1; if (!nextExpressions.TryAdd(next, [nextExpression])) { diff --git a/Canon.Core/GrammarParser/SyntaxNode.cs b/Canon.Core/GrammarParser/SyntaxNode.cs new file mode 100644 index 0000000..8722147 --- /dev/null +++ b/Canon.Core/GrammarParser/SyntaxNode.cs @@ -0,0 +1,130 @@ +using System.Collections; +using Canon.Core.Enums; +using Canon.Core.LexicalParser; + +namespace Canon.Core.GrammarParser; + +/// +/// 抽象语法树上的节点 +/// +public class SyntaxNode : IEquatable, IEnumerable +{ + private readonly SemanticToken? _semanticToken; + private readonly NonTerminatorType _nonTerminatorType; + + public bool IsTerminated { get; } + + public List Children { get; } = []; + + public SyntaxNode(SemanticToken token) + { + IsTerminated = true; + _semanticToken = token; + } + + public SyntaxNode(NonTerminatorType nonTerminatorType) + { + IsTerminated = false; + _nonTerminatorType = nonTerminatorType; + } + + /// + /// 获得终结节点包含的记号对象 + /// + /// 词法分析得到的记号对象 + /// 在非终结节点上调用该方法 + public SemanticToken GetSemanticToken() + { + if (!IsTerminated) + { + throw new InvalidOperationException("Can not get semantic token from a not terminated node"); + } + + return _semanticToken!; + } + + /// + /// 获得非终结节点的类型 + /// + /// 非终结节点类型 + /// 在终结节点上调用该方法 + public NonTerminatorType GetNonTerminatorType() + { + if (IsTerminated) + { + throw new InvalidOperationException("Can not get non terminated type from a terminated node"); + } + + return _nonTerminatorType; + } + + public IEnumerator GetEnumerator() + { + yield return this; + + foreach (SyntaxNode child in Children) + { + foreach (SyntaxNode node in child) + { + yield return node; + } + } + } + + public bool Equals(SyntaxNode? other) + { + if (other is null) + { + return false; + } + + if (IsTerminated != other.IsTerminated) + { + return false; + } + + if (IsTerminated) + { + return GetSemanticToken() == other.GetSemanticToken(); + } + else + { + // 在判等时是否需要判断子节点也相等? + return GetNonTerminatorType() == other.GetNonTerminatorType(); + } + } + + public override bool Equals(object? obj) + { + if (obj is not SyntaxNode other) + { + return false; + } + + return Equals(other); + } + + public override int GetHashCode() + { + if (IsTerminated) + { + return GetSemanticToken().GetHashCode(); + } + else + { + return GetNonTerminatorType().GetHashCode(); + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + public static bool operator ==(SyntaxNode a, SyntaxNode b) + { + return a.Equals(b); + } + + public static bool operator !=(SyntaxNode a, SyntaxNode b) + { + return !a.Equals(b); + } +} diff --git a/Canon.Core/GrammarParser/Terminator.cs b/Canon.Core/GrammarParser/Terminator.cs index b12de71..f0f7569 100644 --- a/Canon.Core/GrammarParser/Terminator.cs +++ b/Canon.Core/GrammarParser/Terminator.cs @@ -96,7 +96,7 @@ public class Terminator : TerminatorBase, IEquatable case SemanticTokenType.Delimiter: return _delimiterType.ToString(); default: - return _keywordType.ToString(); + return _terminatorType.ToString(); } } @@ -189,23 +189,18 @@ public class Terminator : TerminatorBase, IEquatable /// /// 语法中的非终结符 /// -public class NonTerminator : TerminatorBase, IEquatable +public class NonTerminator(NonTerminatorType type) : TerminatorBase, IEquatable { public override bool IsTerminated => false; - private readonly NonTerminatorType _type; - - public NonTerminator(NonTerminatorType type) - { - _type = type; - } + public NonTerminatorType Type { get; } = type; public override int GetHashCode() { - return _type.GetHashCode(); + return Type.GetHashCode(); } - public override string ToString() => _type.ToString(); + public override string ToString() => Type.ToString(); public bool Equals(NonTerminator? other) { @@ -214,7 +209,7 @@ public class NonTerminator : TerminatorBase, IEquatable return false; } - return _type == other._type; + return Type == other.Type; } public override bool Equals(object? obj) diff --git a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs index 3d9b518..549799b 100644 --- a/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs +++ b/Canon.Tests/GrammarParserTests/SimpleGrammarTests.cs @@ -118,6 +118,7 @@ public class SimpleGrammarTests }; Grammar grammar = builder.Build(); + // n + n List tokens = [ new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, @@ -129,7 +130,73 @@ public class SimpleGrammarTests SemanticToken.End ]; - // 验证分析语句不会抛出错误 - grammar.Analyse(tokens); + // 分析树为 + // E + // | + // /\ + // / | \ + // E + T + // | | + // T F + // | | + // F n + // | + // n + SyntaxNode root = grammar.Analyse(tokens); + Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); + Assert.Equal(3, root.Children.Count); + Assert.Contains(root.Children, node => + { + if (node.IsTerminated && node.GetSemanticToken().TokenType == SemanticTokenType.Operator) + { + OperatorSemanticToken token = (OperatorSemanticToken)node.GetSemanticToken(); + + return token.OperatorType == OperatorType.Plus; + } + + return false; + }); + Assert.Equal(9, root.Count()); + } + + [Fact] + public void AnalyseComplexSentenceTest() + { + GrammarBuilder builder = new() + { + Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator) + }; + + Grammar grammar = builder.Build(); + // (n + n) * n + List tokens = + [ + new DelimiterSemanticToken + { + LinePos = 0, CharacterPos = 0, LiteralValue = "(", DelimiterType = DelimiterType.LeftParenthesis + }, + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, + new OperatorSemanticToken + { + LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus + }, + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, + new DelimiterSemanticToken + { + LinePos = 0, CharacterPos = 0, LiteralValue = ")", DelimiterType = DelimiterType.RightParenthesis + }, + new OperatorSemanticToken + { + LinePos = 0, CharacterPos = 0, LiteralValue = "*", OperatorType = OperatorType.Multiply + }, + new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, + SemanticToken.End + ]; + + SyntaxNode root = grammar.Analyse(tokens); + Assert.Equal(18, root.Count()); + Assert.False(root.IsTerminated); + Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType()); + Assert.Single(root.Children); } }