add: 抽象语法树节点 (#5)
fix: - Expression格式化过程中如果Pos在最右边就不显示 - Expression中不考虑Pos Reviewed-on: PostGuard/Canon#5
This commit is contained in:
parent
9930dbc42c
commit
315deaabf2
|
@ -24,7 +24,7 @@ public class Expression : IEquatable<Expression>
|
|||
/// <summary>
|
||||
/// 当前移进的位置
|
||||
/// </summary>
|
||||
public int Pos { get; set; }
|
||||
public required int Pos { get; init; }
|
||||
|
||||
public bool Equals(Expression? other)
|
||||
{
|
||||
|
@ -52,7 +52,8 @@ public class Expression : IEquatable<Expression>
|
|||
}
|
||||
|
||||
return Left == other.Left
|
||||
&& LookAhead == other.LookAhead;
|
||||
&& LookAhead == other.LookAhead
|
||||
&& Pos == other.Pos;
|
||||
}
|
||||
|
||||
public override bool Equals(object? obj)
|
||||
|
@ -69,6 +70,7 @@ public class Expression : IEquatable<Expression>
|
|||
{
|
||||
int hash = Left.GetHashCode();
|
||||
hash ^= LookAhead.GetHashCode();
|
||||
hash ^= Pos.GetHashCode();
|
||||
|
||||
foreach (TerminatorBase terminator in Right)
|
||||
{
|
||||
|
@ -93,6 +95,11 @@ public class Expression : IEquatable<Expression>
|
|||
result += Right[i].ToString();
|
||||
}
|
||||
|
||||
if (Pos == Right.Count)
|
||||
{
|
||||
result += '~';
|
||||
}
|
||||
|
||||
result += $", {LookAhead}";
|
||||
|
||||
return result;
|
||||
|
|
|
@ -8,10 +8,10 @@ public class Grammar
|
|||
|
||||
public required LrState BeginState { get; init; }
|
||||
|
||||
public void Analyse(IEnumerable<SemanticToken> tokens)
|
||||
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
|
||||
{
|
||||
Stack<LrState> stack = [];
|
||||
stack.Push(BeginState);
|
||||
Stack<AnalyseState> stack = [];
|
||||
stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
|
||||
|
||||
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
|
||||
if (!enumerator.MoveNext())
|
||||
|
@ -21,11 +21,11 @@ public class Grammar
|
|||
|
||||
while (true)
|
||||
{
|
||||
LrState top = stack.Peek();
|
||||
AnalyseState top = stack.Peek();
|
||||
|
||||
// 尝试进行移进
|
||||
bool acceptFlag = false, reduceFlag = false;
|
||||
foreach (Expression e in top.Expressions)
|
||||
foreach (Expression e in top.State.Expressions)
|
||||
{
|
||||
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
|
||||
{
|
||||
|
@ -36,21 +36,24 @@ public class Grammar
|
|||
else
|
||||
{
|
||||
reduceFlag = true;
|
||||
SyntaxNode newNode = new(e.Left.Type);
|
||||
|
||||
for (int i = 0; i < e.Right.Count; i++)
|
||||
{
|
||||
stack.Pop();
|
||||
newNode.Children.Add(stack.Pop().Node);
|
||||
}
|
||||
|
||||
stack.Push(stack.Peek().Transformer[e.Left]);
|
||||
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
|
||||
newNode));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (acceptFlag)
|
||||
{
|
||||
// 接受文法 退出循环
|
||||
break;
|
||||
return top.Node;
|
||||
}
|
||||
|
||||
if (reduceFlag)
|
||||
|
@ -60,9 +63,9 @@ public class Grammar
|
|||
}
|
||||
|
||||
// 尝试进行移进
|
||||
if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next))
|
||||
if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
|
||||
{
|
||||
stack.Push(next);
|
||||
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
|
||||
if (enumerator.MoveNext())
|
||||
{
|
||||
continue;
|
||||
|
@ -76,4 +79,6 @@ public class Grammar
|
|||
throw new InvalidOperationException("Failed to analyse input grammar");
|
||||
}
|
||||
}
|
||||
|
||||
private record AnalyseState(LrState State, SyntaxNode Node);
|
||||
}
|
||||
|
|
|
@ -177,7 +177,7 @@ public class GrammarBuilder
|
|||
{
|
||||
Expression newExpression = new()
|
||||
{
|
||||
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead
|
||||
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0
|
||||
};
|
||||
|
||||
if (!closure.Contains(newExpression))
|
||||
|
@ -207,7 +207,7 @@ public class GrammarBuilder
|
|||
|
||||
Expression begin = new()
|
||||
{
|
||||
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator
|
||||
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
|
||||
};
|
||||
|
||||
LrState beginState = new() { Expressions = CalculateClosure(begin) };
|
||||
|
@ -235,9 +235,8 @@ public class GrammarBuilder
|
|||
TerminatorBase next = e.Right[e.Pos];
|
||||
Expression nextExpression = new()
|
||||
{
|
||||
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos
|
||||
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + 1
|
||||
};
|
||||
nextExpression.Pos += 1;
|
||||
|
||||
if (!nextExpressions.TryAdd(next, [nextExpression]))
|
||||
{
|
||||
|
|
130
Canon.Core/GrammarParser/SyntaxNode.cs
Normal file
130
Canon.Core/GrammarParser/SyntaxNode.cs
Normal file
|
@ -0,0 +1,130 @@
|
|||
using System.Collections;
|
||||
using Canon.Core.Enums;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
namespace Canon.Core.GrammarParser;
|
||||
|
||||
/// <summary>
|
||||
/// 抽象语法树上的节点
|
||||
/// </summary>
|
||||
public class SyntaxNode : IEquatable<SyntaxNode>, IEnumerable<SyntaxNode>
|
||||
{
|
||||
private readonly SemanticToken? _semanticToken;
|
||||
private readonly NonTerminatorType _nonTerminatorType;
|
||||
|
||||
public bool IsTerminated { get; }
|
||||
|
||||
public List<SyntaxNode> Children { get; } = [];
|
||||
|
||||
public SyntaxNode(SemanticToken token)
|
||||
{
|
||||
IsTerminated = true;
|
||||
_semanticToken = token;
|
||||
}
|
||||
|
||||
public SyntaxNode(NonTerminatorType nonTerminatorType)
|
||||
{
|
||||
IsTerminated = false;
|
||||
_nonTerminatorType = nonTerminatorType;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获得终结节点包含的记号对象
|
||||
/// </summary>
|
||||
/// <returns>词法分析得到的记号对象</returns>
|
||||
/// <exception cref="InvalidOperationException">在非终结节点上调用该方法</exception>
|
||||
public SemanticToken GetSemanticToken()
|
||||
{
|
||||
if (!IsTerminated)
|
||||
{
|
||||
throw new InvalidOperationException("Can not get semantic token from a not terminated node");
|
||||
}
|
||||
|
||||
return _semanticToken!;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获得非终结节点的类型
|
||||
/// </summary>
|
||||
/// <returns>非终结节点类型</returns>
|
||||
/// <exception cref="InvalidOperationException">在终结节点上调用该方法</exception>
|
||||
public NonTerminatorType GetNonTerminatorType()
|
||||
{
|
||||
if (IsTerminated)
|
||||
{
|
||||
throw new InvalidOperationException("Can not get non terminated type from a terminated node");
|
||||
}
|
||||
|
||||
return _nonTerminatorType;
|
||||
}
|
||||
|
||||
public IEnumerator<SyntaxNode> GetEnumerator()
|
||||
{
|
||||
yield return this;
|
||||
|
||||
foreach (SyntaxNode child in Children)
|
||||
{
|
||||
foreach (SyntaxNode node in child)
|
||||
{
|
||||
yield return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public bool Equals(SyntaxNode? other)
|
||||
{
|
||||
if (other is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IsTerminated != other.IsTerminated)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (IsTerminated)
|
||||
{
|
||||
return GetSemanticToken() == other.GetSemanticToken();
|
||||
}
|
||||
else
|
||||
{
|
||||
// 在判等时是否需要判断子节点也相等?
|
||||
return GetNonTerminatorType() == other.GetNonTerminatorType();
|
||||
}
|
||||
}
|
||||
|
||||
public override bool Equals(object? obj)
|
||||
{
|
||||
if (obj is not SyntaxNode other)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return Equals(other);
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
if (IsTerminated)
|
||||
{
|
||||
return GetSemanticToken().GetHashCode();
|
||||
}
|
||||
else
|
||||
{
|
||||
return GetNonTerminatorType().GetHashCode();
|
||||
}
|
||||
}
|
||||
|
||||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
||||
|
||||
public static bool operator ==(SyntaxNode a, SyntaxNode b)
|
||||
{
|
||||
return a.Equals(b);
|
||||
}
|
||||
|
||||
public static bool operator !=(SyntaxNode a, SyntaxNode b)
|
||||
{
|
||||
return !a.Equals(b);
|
||||
}
|
||||
}
|
|
@ -96,7 +96,7 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
|
|||
case SemanticTokenType.Delimiter:
|
||||
return _delimiterType.ToString();
|
||||
default:
|
||||
return _keywordType.ToString();
|
||||
return _terminatorType.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,23 +189,18 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
|
|||
/// <summary>
|
||||
/// 语法中的非终结符
|
||||
/// </summary>
|
||||
public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
|
||||
public class NonTerminator(NonTerminatorType type) : TerminatorBase, IEquatable<NonTerminator>
|
||||
{
|
||||
public override bool IsTerminated => false;
|
||||
|
||||
private readonly NonTerminatorType _type;
|
||||
|
||||
public NonTerminator(NonTerminatorType type)
|
||||
{
|
||||
_type = type;
|
||||
}
|
||||
public NonTerminatorType Type { get; } = type;
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return _type.GetHashCode();
|
||||
return Type.GetHashCode();
|
||||
}
|
||||
|
||||
public override string ToString() => _type.ToString();
|
||||
public override string ToString() => Type.ToString();
|
||||
|
||||
public bool Equals(NonTerminator? other)
|
||||
{
|
||||
|
@ -214,7 +209,7 @@ public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
|
|||
return false;
|
||||
}
|
||||
|
||||
return _type == other._type;
|
||||
return Type == other.Type;
|
||||
}
|
||||
|
||||
public override bool Equals(object? obj)
|
||||
|
|
|
@ -118,6 +118,7 @@ public class SimpleGrammarTests
|
|||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
// n + n
|
||||
List<SemanticToken> tokens =
|
||||
[
|
||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||
|
@ -129,7 +130,73 @@ public class SimpleGrammarTests
|
|||
SemanticToken.End
|
||||
];
|
||||
|
||||
// 验证分析语句不会抛出错误
|
||||
grammar.Analyse(tokens);
|
||||
// 分析树为
|
||||
// E
|
||||
// |
|
||||
// /\
|
||||
// / | \
|
||||
// E + T
|
||||
// | |
|
||||
// T F
|
||||
// | |
|
||||
// F n
|
||||
// |
|
||||
// n
|
||||
SyntaxNode root = grammar.Analyse(tokens);
|
||||
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||
Assert.Equal(3, root.Children.Count);
|
||||
Assert.Contains(root.Children, node =>
|
||||
{
|
||||
if (node.IsTerminated && node.GetSemanticToken().TokenType == SemanticTokenType.Operator)
|
||||
{
|
||||
OperatorSemanticToken token = (OperatorSemanticToken)node.GetSemanticToken();
|
||||
|
||||
return token.OperatorType == OperatorType.Plus;
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
Assert.Equal(9, root.Count());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AnalyseComplexSentenceTest()
|
||||
{
|
||||
GrammarBuilder builder = new()
|
||||
{
|
||||
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
|
||||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
// (n + n) * n
|
||||
List<SemanticToken> tokens =
|
||||
[
|
||||
new DelimiterSemanticToken
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, LiteralValue = "(", DelimiterType = DelimiterType.LeftParenthesis
|
||||
},
|
||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||
new OperatorSemanticToken
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus
|
||||
},
|
||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||
new DelimiterSemanticToken
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, LiteralValue = ")", DelimiterType = DelimiterType.RightParenthesis
|
||||
},
|
||||
new OperatorSemanticToken
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, LiteralValue = "*", OperatorType = OperatorType.Multiply
|
||||
},
|
||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||
SemanticToken.End
|
||||
];
|
||||
|
||||
SyntaxNode root = grammar.Analyse(tokens);
|
||||
Assert.Equal(18, root.Count());
|
||||
Assert.False(root.IsTerminated);
|
||||
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||
Assert.Single(root.Children);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user