add: 抽象语法树节点 (#5)

fix:
- Expression格式化过程中如果Pos在最右边就不显示
- Expression中不考虑Pos

Reviewed-on: PostGuard/Canon#5
This commit is contained in:
jackfiled 2024-03-11 21:57:47 +08:00
parent 9930dbc42c
commit 315deaabf2
6 changed files with 232 additions and 29 deletions

View File

@ -24,7 +24,7 @@ public class Expression : IEquatable<Expression>
/// <summary>
/// 当前移进的位置
/// </summary>
public int Pos { get; set; }
public required int Pos { get; init; }
public bool Equals(Expression? other)
{
@ -52,7 +52,8 @@ public class Expression : IEquatable<Expression>
}
return Left == other.Left
&& LookAhead == other.LookAhead;
&& LookAhead == other.LookAhead
&& Pos == other.Pos;
}
public override bool Equals(object? obj)
@ -69,6 +70,7 @@ public class Expression : IEquatable<Expression>
{
int hash = Left.GetHashCode();
hash ^= LookAhead.GetHashCode();
hash ^= Pos.GetHashCode();
foreach (TerminatorBase terminator in Right)
{
@ -93,6 +95,11 @@ public class Expression : IEquatable<Expression>
result += Right[i].ToString();
}
if (Pos == Right.Count)
{
result += '~';
}
result += $", {LookAhead}";
return result;

View File

@ -8,10 +8,10 @@ public class Grammar
public required LrState BeginState { get; init; }
public void Analyse(IEnumerable<SemanticToken> tokens)
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
{
Stack<LrState> stack = [];
stack.Push(BeginState);
Stack<AnalyseState> stack = [];
stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
if (!enumerator.MoveNext())
@ -21,11 +21,11 @@ public class Grammar
while (true)
{
LrState top = stack.Peek();
AnalyseState top = stack.Peek();
// 尝试进行移进
bool acceptFlag = false, reduceFlag = false;
foreach (Expression e in top.Expressions)
foreach (Expression e in top.State.Expressions)
{
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
{
@ -36,21 +36,24 @@ public class Grammar
else
{
reduceFlag = true;
SyntaxNode newNode = new(e.Left.Type);
for (int i = 0; i < e.Right.Count; i++)
{
stack.Pop();
newNode.Children.Add(stack.Pop().Node);
}
stack.Push(stack.Peek().Transformer[e.Left]);
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
newNode));
}
break;
}
}
if (acceptFlag)
{
// 接受文法 退出循环
break;
return top.Node;
}
if (reduceFlag)
@ -60,9 +63,9 @@ public class Grammar
}
// 尝试进行移进
if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next))
if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
{
stack.Push(next);
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
if (enumerator.MoveNext())
{
continue;
@ -76,4 +79,6 @@ public class Grammar
throw new InvalidOperationException("Failed to analyse input grammar");
}
}
private record AnalyseState(LrState State, SyntaxNode Node);
}

View File

@ -177,7 +177,7 @@ public class GrammarBuilder
{
Expression newExpression = new()
{
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0
};
if (!closure.Contains(newExpression))
@ -207,7 +207,7 @@ public class GrammarBuilder
Expression begin = new()
{
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
};
LrState beginState = new() { Expressions = CalculateClosure(begin) };
@ -235,9 +235,8 @@ public class GrammarBuilder
TerminatorBase next = e.Right[e.Pos];
Expression nextExpression = new()
{
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + 1
};
nextExpression.Pos += 1;
if (!nextExpressions.TryAdd(next, [nextExpression]))
{

View File

@ -0,0 +1,130 @@
using System.Collections;
using Canon.Core.Enums;
using Canon.Core.LexicalParser;
namespace Canon.Core.GrammarParser;
/// <summary>
/// 抽象语法树上的节点
/// </summary>
public class SyntaxNode : IEquatable<SyntaxNode>, IEnumerable<SyntaxNode>
{
private readonly SemanticToken? _semanticToken;
private readonly NonTerminatorType _nonTerminatorType;
public bool IsTerminated { get; }
public List<SyntaxNode> Children { get; } = [];
public SyntaxNode(SemanticToken token)
{
IsTerminated = true;
_semanticToken = token;
}
public SyntaxNode(NonTerminatorType nonTerminatorType)
{
IsTerminated = false;
_nonTerminatorType = nonTerminatorType;
}
/// <summary>
/// 获得终结节点包含的记号对象
/// </summary>
/// <returns>词法分析得到的记号对象</returns>
/// <exception cref="InvalidOperationException">在非终结节点上调用该方法</exception>
public SemanticToken GetSemanticToken()
{
if (!IsTerminated)
{
throw new InvalidOperationException("Can not get semantic token from a not terminated node");
}
return _semanticToken!;
}
/// <summary>
/// 获得非终结节点的类型
/// </summary>
/// <returns>非终结节点类型</returns>
/// <exception cref="InvalidOperationException">在终结节点上调用该方法</exception>
public NonTerminatorType GetNonTerminatorType()
{
if (IsTerminated)
{
throw new InvalidOperationException("Can not get non terminated type from a terminated node");
}
return _nonTerminatorType;
}
public IEnumerator<SyntaxNode> GetEnumerator()
{
yield return this;
foreach (SyntaxNode child in Children)
{
foreach (SyntaxNode node in child)
{
yield return node;
}
}
}
public bool Equals(SyntaxNode? other)
{
if (other is null)
{
return false;
}
if (IsTerminated != other.IsTerminated)
{
return false;
}
if (IsTerminated)
{
return GetSemanticToken() == other.GetSemanticToken();
}
else
{
// 在判等时是否需要判断子节点也相等?
return GetNonTerminatorType() == other.GetNonTerminatorType();
}
}
public override bool Equals(object? obj)
{
if (obj is not SyntaxNode other)
{
return false;
}
return Equals(other);
}
public override int GetHashCode()
{
if (IsTerminated)
{
return GetSemanticToken().GetHashCode();
}
else
{
return GetNonTerminatorType().GetHashCode();
}
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public static bool operator ==(SyntaxNode a, SyntaxNode b)
{
return a.Equals(b);
}
public static bool operator !=(SyntaxNode a, SyntaxNode b)
{
return !a.Equals(b);
}
}

View File

@ -96,7 +96,7 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
case SemanticTokenType.Delimiter:
return _delimiterType.ToString();
default:
return _keywordType.ToString();
return _terminatorType.ToString();
}
}
@ -189,23 +189,18 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
/// <summary>
/// 语法中的非终结符
/// </summary>
public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
public class NonTerminator(NonTerminatorType type) : TerminatorBase, IEquatable<NonTerminator>
{
public override bool IsTerminated => false;
private readonly NonTerminatorType _type;
public NonTerminator(NonTerminatorType type)
{
_type = type;
}
public NonTerminatorType Type { get; } = type;
public override int GetHashCode()
{
return _type.GetHashCode();
return Type.GetHashCode();
}
public override string ToString() => _type.ToString();
public override string ToString() => Type.ToString();
public bool Equals(NonTerminator? other)
{
@ -214,7 +209,7 @@ public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
return false;
}
return _type == other._type;
return Type == other.Type;
}
public override bool Equals(object? obj)

View File

@ -118,6 +118,7 @@ public class SimpleGrammarTests
};
Grammar grammar = builder.Build();
// n + n
List<SemanticToken> tokens =
[
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
@ -129,7 +130,73 @@ public class SimpleGrammarTests
SemanticToken.End
];
// 验证分析语句不会抛出错误
grammar.Analyse(tokens);
// 分析树为
// E
// |
// /\
// / | \
// E + T
// | |
// T F
// | |
// F n
// |
// n
SyntaxNode root = grammar.Analyse(tokens);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Equal(3, root.Children.Count);
Assert.Contains(root.Children, node =>
{
if (node.IsTerminated && node.GetSemanticToken().TokenType == SemanticTokenType.Operator)
{
OperatorSemanticToken token = (OperatorSemanticToken)node.GetSemanticToken();
return token.OperatorType == OperatorType.Plus;
}
return false;
});
Assert.Equal(9, root.Count());
}
[Fact]
public void AnalyseComplexSentenceTest()
{
GrammarBuilder builder = new()
{
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
};
Grammar grammar = builder.Build();
// (n + n) * n
List<SemanticToken> tokens =
[
new DelimiterSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "(", DelimiterType = DelimiterType.LeftParenthesis
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
new OperatorSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
new DelimiterSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = ")", DelimiterType = DelimiterType.RightParenthesis
},
new OperatorSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "*", OperatorType = OperatorType.Multiply
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
SemanticToken.End
];
SyntaxNode root = grammar.Analyse(tokens);
Assert.Equal(18, root.Count());
Assert.False(root.IsTerminated);
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
Assert.Single(root.Children);
}
}