add: 抽象语法树节点 (#5)

fix:
- Expression格式化过程中如果Pos在最右边就不显示
- Expression中不考虑Pos

Reviewed-on: PostGuard/Canon#5
This commit is contained in:
jackfiled 2024-03-11 21:57:47 +08:00
parent 9930dbc42c
commit 315deaabf2
6 changed files with 232 additions and 29 deletions

View File

@ -24,7 +24,7 @@ public class Expression : IEquatable<Expression>
/// <summary> /// <summary>
/// 当前移进的位置 /// 当前移进的位置
/// </summary> /// </summary>
public int Pos { get; set; } public required int Pos { get; init; }
public bool Equals(Expression? other) public bool Equals(Expression? other)
{ {
@ -52,7 +52,8 @@ public class Expression : IEquatable<Expression>
} }
return Left == other.Left return Left == other.Left
&& LookAhead == other.LookAhead; && LookAhead == other.LookAhead
&& Pos == other.Pos;
} }
public override bool Equals(object? obj) public override bool Equals(object? obj)
@ -69,6 +70,7 @@ public class Expression : IEquatable<Expression>
{ {
int hash = Left.GetHashCode(); int hash = Left.GetHashCode();
hash ^= LookAhead.GetHashCode(); hash ^= LookAhead.GetHashCode();
hash ^= Pos.GetHashCode();
foreach (TerminatorBase terminator in Right) foreach (TerminatorBase terminator in Right)
{ {
@ -93,6 +95,11 @@ public class Expression : IEquatable<Expression>
result += Right[i].ToString(); result += Right[i].ToString();
} }
if (Pos == Right.Count)
{
result += '~';
}
result += $", {LookAhead}"; result += $", {LookAhead}";
return result; return result;

View File

@ -8,10 +8,10 @@ public class Grammar
public required LrState BeginState { get; init; } public required LrState BeginState { get; init; }
public void Analyse(IEnumerable<SemanticToken> tokens) public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
{ {
Stack<LrState> stack = []; Stack<AnalyseState> stack = [];
stack.Push(BeginState); stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator(); using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
if (!enumerator.MoveNext()) if (!enumerator.MoveNext())
@ -21,11 +21,11 @@ public class Grammar
while (true) while (true)
{ {
LrState top = stack.Peek(); AnalyseState top = stack.Peek();
// 尝试进行移进 // 尝试进行移进
bool acceptFlag = false, reduceFlag = false; bool acceptFlag = false, reduceFlag = false;
foreach (Expression e in top.Expressions) foreach (Expression e in top.State.Expressions)
{ {
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current) if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
{ {
@ -36,21 +36,24 @@ public class Grammar
else else
{ {
reduceFlag = true; reduceFlag = true;
SyntaxNode newNode = new(e.Left.Type);
for (int i = 0; i < e.Right.Count; i++) for (int i = 0; i < e.Right.Count; i++)
{ {
stack.Pop(); newNode.Children.Add(stack.Pop().Node);
} }
stack.Push(stack.Peek().Transformer[e.Left]); stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
newNode));
} }
break;
} }
} }
if (acceptFlag) if (acceptFlag)
{ {
// 接受文法 退出循环 // 接受文法 退出循环
break; return top.Node;
} }
if (reduceFlag) if (reduceFlag)
@ -60,9 +63,9 @@ public class Grammar
} }
// 尝试进行移进 // 尝试进行移进
if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next)) if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
{ {
stack.Push(next); stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
if (enumerator.MoveNext()) if (enumerator.MoveNext())
{ {
continue; continue;
@ -76,4 +79,6 @@ public class Grammar
throw new InvalidOperationException("Failed to analyse input grammar"); throw new InvalidOperationException("Failed to analyse input grammar");
} }
} }
private record AnalyseState(LrState State, SyntaxNode Node);
} }

View File

@ -177,7 +177,7 @@ public class GrammarBuilder
{ {
Expression newExpression = new() Expression newExpression = new()
{ {
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0
}; };
if (!closure.Contains(newExpression)) if (!closure.Contains(newExpression))
@ -207,7 +207,7 @@ public class GrammarBuilder
Expression begin = new() Expression begin = new()
{ {
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
}; };
LrState beginState = new() { Expressions = CalculateClosure(begin) }; LrState beginState = new() { Expressions = CalculateClosure(begin) };
@ -235,9 +235,8 @@ public class GrammarBuilder
TerminatorBase next = e.Right[e.Pos]; TerminatorBase next = e.Right[e.Pos];
Expression nextExpression = new() Expression nextExpression = new()
{ {
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + 1
}; };
nextExpression.Pos += 1;
if (!nextExpressions.TryAdd(next, [nextExpression])) if (!nextExpressions.TryAdd(next, [nextExpression]))
{ {

View File

@ -0,0 +1,130 @@
using System.Collections;
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
namespace Canon.Core.GrammarParser;
/// <summary>
/// 抽象语法树上的节点
/// </summary>
public class SyntaxNode : IEquatable<SyntaxNode>, IEnumerable<SyntaxNode>
{
private readonly SemanticToken? _semanticToken;
private readonly NonTerminatorType _nonTerminatorType;
public bool IsTerminated { get; }
public List<SyntaxNode> Children { get; } = [];
public SyntaxNode(SemanticToken token)
{
IsTerminated = true;
_semanticToken = token;
}
public SyntaxNode(NonTerminatorType nonTerminatorType)
{
IsTerminated = false;
_nonTerminatorType = nonTerminatorType;
}
/// <summary>
/// 获得终结节点包含的记号对象
/// </summary>
/// <returns>词法分析得到的记号对象</returns>
/// <exception cref="InvalidOperationException">在非终结节点上调用该方法</exception>
public SemanticToken GetSemanticToken()
{
if (!IsTerminated)
{
throw new InvalidOperationException("Can not get semantic token from a not terminated node");
}
return _semanticToken!;
}
/// <summary>
/// 获得非终结节点的类型
/// </summary>
/// <returns>非终结节点类型</returns>
/// <exception cref="InvalidOperationException">在终结节点上调用该方法</exception>
public NonTerminatorType GetNonTerminatorType()
{
if (IsTerminated)
{
throw new InvalidOperationException("Can not get non terminated type from a terminated node");
}
return _nonTerminatorType;
}
public IEnumerator<SyntaxNode> GetEnumerator()
{
yield return this;
foreach (SyntaxNode child in Children)
{
foreach (SyntaxNode node in child)
{
yield return node;
}
}
}
public bool Equals(SyntaxNode? other)
{
if (other is null)
{
return false;
}
if (IsTerminated != other.IsTerminated)
{
return false;
}
if (IsTerminated)
{
return GetSemanticToken() == other.GetSemanticToken();
}
else
{
// 在判等时是否需要判断子节点也相等?
return GetNonTerminatorType() == other.GetNonTerminatorType();
}
}
public override bool Equals(object? obj)
{
if (obj is not SyntaxNode other)
{
return false;
}
return Equals(other);
}
public override int GetHashCode()
{
if (IsTerminated)
{
return GetSemanticToken().GetHashCode();
}
else
{
return GetNonTerminatorType().GetHashCode();
}
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public static bool operator ==(SyntaxNode a, SyntaxNode b)
{
return a.Equals(b);
}
public static bool operator !=(SyntaxNode a, SyntaxNode b)
{
return !a.Equals(b);
}
}

View File

@ -96,7 +96,7 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
case SemanticTokenType.Delimiter: case SemanticTokenType.Delimiter:
return _delimiterType.ToString(); return _delimiterType.ToString();
default: default:
return _keywordType.ToString(); return _terminatorType.ToString();
} }
} }
@ -189,23 +189,18 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
/// <summary> /// <summary>
/// 语法中的非终结符 /// 语法中的非终结符
/// </summary> /// </summary>
public class NonTerminator : TerminatorBase, IEquatable<NonTerminator> public class NonTerminator(NonTerminatorType type) : TerminatorBase, IEquatable<NonTerminator>
{ {
public override bool IsTerminated => false; public override bool IsTerminated => false;
private readonly NonTerminatorType _type; public NonTerminatorType Type { get; } = type;
public NonTerminator(NonTerminatorType type)
{
_type = type;
}
public override int GetHashCode() public override int GetHashCode()
{ {
return _type.GetHashCode(); return Type.GetHashCode();
} }
public override string ToString() => _type.ToString(); public override string ToString() => Type.ToString();
public bool Equals(NonTerminator? other) public bool Equals(NonTerminator? other)
{ {
@ -214,7 +209,7 @@ public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
return false; return false;
} }
return _type == other._type; return Type == other.Type;
} }
public override bool Equals(object? obj) public override bool Equals(object? obj)

View File

@ -118,6 +118,7 @@ public class SimpleGrammarTests
}; };
Grammar grammar = builder.Build(); Grammar grammar = builder.Build();
// n + n
List<SemanticToken> tokens = List<SemanticToken> tokens =
[ [
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" }, new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
@ -129,7 +130,73 @@ public class SimpleGrammarTests
SemanticToken.End SemanticToken.End
]; ];
// 验证分析语句不会抛出错误 // 分析树为
grammar.Analyse(tokens); // E
// |
// /\
// / | \
// E + T
// | |
// T F
// | |
// F n
// |
// n
SyntaxNode root = grammar.Analyse(tokens);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Equal(3, root.Children.Count);
Assert.Contains(root.Children, node =>
{
if (node.IsTerminated && node.GetSemanticToken().TokenType == SemanticTokenType.Operator)
{
OperatorSemanticToken token = (OperatorSemanticToken)node.GetSemanticToken();
return token.OperatorType == OperatorType.Plus;
}
return false;
});
Assert.Equal(9, root.Count());
}
[Fact]
public void AnalyseComplexSentenceTest()
{
GrammarBuilder builder = new()
{
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
};
Grammar grammar = builder.Build();
// (n + n) * n
List<SemanticToken> tokens =
[
new DelimiterSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "(", DelimiterType = DelimiterType.LeftParenthesis
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
new OperatorSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
new DelimiterSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = ")", DelimiterType = DelimiterType.RightParenthesis
},
new OperatorSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "*", OperatorType = OperatorType.Multiply
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
SemanticToken.End
];
SyntaxNode root = grammar.Analyse(tokens);
Assert.Equal(18, root.Count());
Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Single(root.Children);
} }
} }