add: 抽象语法树节点 (#5)
fix: - Expression格式化过程中如果Pos在最右边就不显示 - Expression中不考虑Pos Reviewed-on: PostGuard/Canon#5
This commit is contained in:
parent
9930dbc42c
commit
315deaabf2
|
@ -24,7 +24,7 @@ public class Expression : IEquatable<Expression>
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 当前移进的位置
|
/// 当前移进的位置
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public int Pos { get; set; }
|
public required int Pos { get; init; }
|
||||||
|
|
||||||
public bool Equals(Expression? other)
|
public bool Equals(Expression? other)
|
||||||
{
|
{
|
||||||
|
@ -52,7 +52,8 @@ public class Expression : IEquatable<Expression>
|
||||||
}
|
}
|
||||||
|
|
||||||
return Left == other.Left
|
return Left == other.Left
|
||||||
&& LookAhead == other.LookAhead;
|
&& LookAhead == other.LookAhead
|
||||||
|
&& Pos == other.Pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool Equals(object? obj)
|
public override bool Equals(object? obj)
|
||||||
|
@ -69,6 +70,7 @@ public class Expression : IEquatable<Expression>
|
||||||
{
|
{
|
||||||
int hash = Left.GetHashCode();
|
int hash = Left.GetHashCode();
|
||||||
hash ^= LookAhead.GetHashCode();
|
hash ^= LookAhead.GetHashCode();
|
||||||
|
hash ^= Pos.GetHashCode();
|
||||||
|
|
||||||
foreach (TerminatorBase terminator in Right)
|
foreach (TerminatorBase terminator in Right)
|
||||||
{
|
{
|
||||||
|
@ -93,6 +95,11 @@ public class Expression : IEquatable<Expression>
|
||||||
result += Right[i].ToString();
|
result += Right[i].ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Pos == Right.Count)
|
||||||
|
{
|
||||||
|
result += '~';
|
||||||
|
}
|
||||||
|
|
||||||
result += $", {LookAhead}";
|
result += $", {LookAhead}";
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -8,10 +8,10 @@ public class Grammar
|
||||||
|
|
||||||
public required LrState BeginState { get; init; }
|
public required LrState BeginState { get; init; }
|
||||||
|
|
||||||
public void Analyse(IEnumerable<SemanticToken> tokens)
|
public SyntaxNode Analyse(IEnumerable<SemanticToken> tokens)
|
||||||
{
|
{
|
||||||
Stack<LrState> stack = [];
|
Stack<AnalyseState> stack = [];
|
||||||
stack.Push(BeginState);
|
stack.Push(new AnalyseState(BeginState, new SyntaxNode(SemanticToken.End)));
|
||||||
|
|
||||||
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
|
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
|
||||||
if (!enumerator.MoveNext())
|
if (!enumerator.MoveNext())
|
||||||
|
@ -21,11 +21,11 @@ public class Grammar
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
LrState top = stack.Peek();
|
AnalyseState top = stack.Peek();
|
||||||
|
|
||||||
// 尝试进行移进
|
// 尝试进行移进
|
||||||
bool acceptFlag = false, reduceFlag = false;
|
bool acceptFlag = false, reduceFlag = false;
|
||||||
foreach (Expression e in top.Expressions)
|
foreach (Expression e in top.State.Expressions)
|
||||||
{
|
{
|
||||||
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
|
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
|
||||||
{
|
{
|
||||||
|
@ -36,21 +36,24 @@ public class Grammar
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
reduceFlag = true;
|
reduceFlag = true;
|
||||||
|
SyntaxNode newNode = new(e.Left.Type);
|
||||||
|
|
||||||
for (int i = 0; i < e.Right.Count; i++)
|
for (int i = 0; i < e.Right.Count; i++)
|
||||||
{
|
{
|
||||||
stack.Pop();
|
newNode.Children.Add(stack.Pop().Node);
|
||||||
}
|
}
|
||||||
|
|
||||||
stack.Push(stack.Peek().Transformer[e.Left]);
|
stack.Push(new AnalyseState(stack.Peek().State.Transformer[e.Left],
|
||||||
|
newNode));
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (acceptFlag)
|
if (acceptFlag)
|
||||||
{
|
{
|
||||||
// 接受文法 退出循环
|
// 接受文法 退出循环
|
||||||
break;
|
return top.Node;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reduceFlag)
|
if (reduceFlag)
|
||||||
|
@ -60,9 +63,9 @@ public class Grammar
|
||||||
}
|
}
|
||||||
|
|
||||||
// 尝试进行移进
|
// 尝试进行移进
|
||||||
if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next))
|
if (top.State.Transformer.TryGetValue(enumerator.Current, out LrState? next))
|
||||||
{
|
{
|
||||||
stack.Push(next);
|
stack.Push(new AnalyseState(next, new SyntaxNode(enumerator.Current)));
|
||||||
if (enumerator.MoveNext())
|
if (enumerator.MoveNext())
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
|
@ -76,4 +79,6 @@ public class Grammar
|
||||||
throw new InvalidOperationException("Failed to analyse input grammar");
|
throw new InvalidOperationException("Failed to analyse input grammar");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private record AnalyseState(LrState State, SyntaxNode Node);
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,7 +177,7 @@ public class GrammarBuilder
|
||||||
{
|
{
|
||||||
Expression newExpression = new()
|
Expression newExpression = new()
|
||||||
{
|
{
|
||||||
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead
|
Left = nonTerminator, Right = nextExpression, LookAhead = lookAhead, Pos = 0
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!closure.Contains(newExpression))
|
if (!closure.Contains(newExpression))
|
||||||
|
@ -207,7 +207,7 @@ public class GrammarBuilder
|
||||||
|
|
||||||
Expression begin = new()
|
Expression begin = new()
|
||||||
{
|
{
|
||||||
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator
|
Left = Begin, Right = Generators[Begin].First(), LookAhead = Terminator.EndTerminator, Pos = 0
|
||||||
};
|
};
|
||||||
|
|
||||||
LrState beginState = new() { Expressions = CalculateClosure(begin) };
|
LrState beginState = new() { Expressions = CalculateClosure(begin) };
|
||||||
|
@ -235,9 +235,8 @@ public class GrammarBuilder
|
||||||
TerminatorBase next = e.Right[e.Pos];
|
TerminatorBase next = e.Right[e.Pos];
|
||||||
Expression nextExpression = new()
|
Expression nextExpression = new()
|
||||||
{
|
{
|
||||||
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos
|
Left = e.Left, Right = e.Right, LookAhead = e.LookAhead, Pos = e.Pos + 1
|
||||||
};
|
};
|
||||||
nextExpression.Pos += 1;
|
|
||||||
|
|
||||||
if (!nextExpressions.TryAdd(next, [nextExpression]))
|
if (!nextExpressions.TryAdd(next, [nextExpression]))
|
||||||
{
|
{
|
||||||
|
|
130
Canon.Core/GrammarParser/SyntaxNode.cs
Normal file
130
Canon.Core/GrammarParser/SyntaxNode.cs
Normal file
|
@ -0,0 +1,130 @@
|
||||||
|
using System.Collections;
|
||||||
|
using Canon.Core.Enums;
|
||||||
|
using Canon.Core.LexicalParser;
|
||||||
|
|
||||||
|
namespace Canon.Core.GrammarParser;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 抽象语法树上的节点
|
||||||
|
/// </summary>
|
||||||
|
public class SyntaxNode : IEquatable<SyntaxNode>, IEnumerable<SyntaxNode>
|
||||||
|
{
|
||||||
|
private readonly SemanticToken? _semanticToken;
|
||||||
|
private readonly NonTerminatorType _nonTerminatorType;
|
||||||
|
|
||||||
|
public bool IsTerminated { get; }
|
||||||
|
|
||||||
|
public List<SyntaxNode> Children { get; } = [];
|
||||||
|
|
||||||
|
public SyntaxNode(SemanticToken token)
|
||||||
|
{
|
||||||
|
IsTerminated = true;
|
||||||
|
_semanticToken = token;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SyntaxNode(NonTerminatorType nonTerminatorType)
|
||||||
|
{
|
||||||
|
IsTerminated = false;
|
||||||
|
_nonTerminatorType = nonTerminatorType;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 获得终结节点包含的记号对象
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>词法分析得到的记号对象</returns>
|
||||||
|
/// <exception cref="InvalidOperationException">在非终结节点上调用该方法</exception>
|
||||||
|
public SemanticToken GetSemanticToken()
|
||||||
|
{
|
||||||
|
if (!IsTerminated)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("Can not get semantic token from a not terminated node");
|
||||||
|
}
|
||||||
|
|
||||||
|
return _semanticToken!;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 获得非终结节点的类型
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>非终结节点类型</returns>
|
||||||
|
/// <exception cref="InvalidOperationException">在终结节点上调用该方法</exception>
|
||||||
|
public NonTerminatorType GetNonTerminatorType()
|
||||||
|
{
|
||||||
|
if (IsTerminated)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException("Can not get non terminated type from a terminated node");
|
||||||
|
}
|
||||||
|
|
||||||
|
return _nonTerminatorType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IEnumerator<SyntaxNode> GetEnumerator()
|
||||||
|
{
|
||||||
|
yield return this;
|
||||||
|
|
||||||
|
foreach (SyntaxNode child in Children)
|
||||||
|
{
|
||||||
|
foreach (SyntaxNode node in child)
|
||||||
|
{
|
||||||
|
yield return node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool Equals(SyntaxNode? other)
|
||||||
|
{
|
||||||
|
if (other is null)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsTerminated != other.IsTerminated)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsTerminated)
|
||||||
|
{
|
||||||
|
return GetSemanticToken() == other.GetSemanticToken();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// 在判等时是否需要判断子节点也相等?
|
||||||
|
return GetNonTerminatorType() == other.GetNonTerminatorType();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public override bool Equals(object? obj)
|
||||||
|
{
|
||||||
|
if (obj is not SyntaxNode other)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Equals(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override int GetHashCode()
|
||||||
|
{
|
||||||
|
if (IsTerminated)
|
||||||
|
{
|
||||||
|
return GetSemanticToken().GetHashCode();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return GetNonTerminatorType().GetHashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
||||||
|
|
||||||
|
public static bool operator ==(SyntaxNode a, SyntaxNode b)
|
||||||
|
{
|
||||||
|
return a.Equals(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool operator !=(SyntaxNode a, SyntaxNode b)
|
||||||
|
{
|
||||||
|
return !a.Equals(b);
|
||||||
|
}
|
||||||
|
}
|
|
@ -96,7 +96,7 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
|
||||||
case SemanticTokenType.Delimiter:
|
case SemanticTokenType.Delimiter:
|
||||||
return _delimiterType.ToString();
|
return _delimiterType.ToString();
|
||||||
default:
|
default:
|
||||||
return _keywordType.ToString();
|
return _terminatorType.ToString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,23 +189,18 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 语法中的非终结符
|
/// 语法中的非终结符
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
|
public class NonTerminator(NonTerminatorType type) : TerminatorBase, IEquatable<NonTerminator>
|
||||||
{
|
{
|
||||||
public override bool IsTerminated => false;
|
public override bool IsTerminated => false;
|
||||||
|
|
||||||
private readonly NonTerminatorType _type;
|
public NonTerminatorType Type { get; } = type;
|
||||||
|
|
||||||
public NonTerminator(NonTerminatorType type)
|
|
||||||
{
|
|
||||||
_type = type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public override int GetHashCode()
|
public override int GetHashCode()
|
||||||
{
|
{
|
||||||
return _type.GetHashCode();
|
return Type.GetHashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
public override string ToString() => _type.ToString();
|
public override string ToString() => Type.ToString();
|
||||||
|
|
||||||
public bool Equals(NonTerminator? other)
|
public bool Equals(NonTerminator? other)
|
||||||
{
|
{
|
||||||
|
@ -214,7 +209,7 @@ public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return _type == other._type;
|
return Type == other.Type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public override bool Equals(object? obj)
|
public override bool Equals(object? obj)
|
||||||
|
|
|
@ -118,6 +118,7 @@ public class SimpleGrammarTests
|
||||||
};
|
};
|
||||||
|
|
||||||
Grammar grammar = builder.Build();
|
Grammar grammar = builder.Build();
|
||||||
|
// n + n
|
||||||
List<SemanticToken> tokens =
|
List<SemanticToken> tokens =
|
||||||
[
|
[
|
||||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||||
|
@ -129,7 +130,73 @@ public class SimpleGrammarTests
|
||||||
SemanticToken.End
|
SemanticToken.End
|
||||||
];
|
];
|
||||||
|
|
||||||
// 验证分析语句不会抛出错误
|
// 分析树为
|
||||||
grammar.Analyse(tokens);
|
// E
|
||||||
|
// |
|
||||||
|
// /\
|
||||||
|
// / | \
|
||||||
|
// E + T
|
||||||
|
// | |
|
||||||
|
// T F
|
||||||
|
// | |
|
||||||
|
// F n
|
||||||
|
// |
|
||||||
|
// n
|
||||||
|
SyntaxNode root = grammar.Analyse(tokens);
|
||||||
|
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||||
|
Assert.Equal(3, root.Children.Count);
|
||||||
|
Assert.Contains(root.Children, node =>
|
||||||
|
{
|
||||||
|
if (node.IsTerminated && node.GetSemanticToken().TokenType == SemanticTokenType.Operator)
|
||||||
|
{
|
||||||
|
OperatorSemanticToken token = (OperatorSemanticToken)node.GetSemanticToken();
|
||||||
|
|
||||||
|
return token.OperatorType == OperatorType.Plus;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
Assert.Equal(9, root.Count());
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void AnalyseComplexSentenceTest()
|
||||||
|
{
|
||||||
|
GrammarBuilder builder = new()
|
||||||
|
{
|
||||||
|
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
|
||||||
|
};
|
||||||
|
|
||||||
|
Grammar grammar = builder.Build();
|
||||||
|
// (n + n) * n
|
||||||
|
List<SemanticToken> tokens =
|
||||||
|
[
|
||||||
|
new DelimiterSemanticToken
|
||||||
|
{
|
||||||
|
LinePos = 0, CharacterPos = 0, LiteralValue = "(", DelimiterType = DelimiterType.LeftParenthesis
|
||||||
|
},
|
||||||
|
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||||
|
new OperatorSemanticToken
|
||||||
|
{
|
||||||
|
LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus
|
||||||
|
},
|
||||||
|
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||||
|
new DelimiterSemanticToken
|
||||||
|
{
|
||||||
|
LinePos = 0, CharacterPos = 0, LiteralValue = ")", DelimiterType = DelimiterType.RightParenthesis
|
||||||
|
},
|
||||||
|
new OperatorSemanticToken
|
||||||
|
{
|
||||||
|
LinePos = 0, CharacterPos = 0, LiteralValue = "*", OperatorType = OperatorType.Multiply
|
||||||
|
},
|
||||||
|
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||||
|
SemanticToken.End
|
||||||
|
];
|
||||||
|
|
||||||
|
SyntaxNode root = grammar.Analyse(tokens);
|
||||||
|
Assert.Equal(18, root.Count());
|
||||||
|
Assert.False(root.IsTerminated);
|
||||||
|
Assert.Equal(NonTerminatorType.ProgramStruct, root.GetNonTerminatorType());
|
||||||
|
Assert.Single(root.Children);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user