feat: 对输入的记号流进行分析 (#4)
添加了语法分析中各个重要状态类的`ToString`方法,方便调试 Reviewed-on: PostGuard/Canon#4
This commit is contained in:
parent
f8bf60e7ea
commit
9930dbc42c
|
@ -12,5 +12,6 @@ jobs:
|
|||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: ${{ runner.os }}-nuget
|
||||
save-always: true
|
||||
- name: Run test code
|
||||
run: dotnet test
|
||||
|
|
|
@ -78,6 +78,26 @@ public class Expression : IEquatable<Expression>
|
|||
return hash;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
string result = $"{Left} -> ";
|
||||
|
||||
for (int i = 0; i < Right.Count; i++)
|
||||
{
|
||||
if (i == Pos)
|
||||
{
|
||||
result += '~';
|
||||
}
|
||||
|
||||
result += ' ';
|
||||
result += Right[i].ToString();
|
||||
}
|
||||
|
||||
result += $", {LookAhead}";
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static bool operator ==(Expression a, Expression b)
|
||||
{
|
||||
return a.Equals(b);
|
||||
|
|
|
@ -1,8 +1,79 @@
|
|||
namespace Canon.Core.GrammarParser;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
namespace Canon.Core.GrammarParser;
|
||||
|
||||
public class Grammar
|
||||
{
|
||||
public required NonTerminator Begin { get; init; }
|
||||
|
||||
public required LrState BeginState { get; init; }
|
||||
|
||||
public void Analyse(IEnumerable<SemanticToken> tokens)
|
||||
{
|
||||
Stack<LrState> stack = [];
|
||||
stack.Push(BeginState);
|
||||
|
||||
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
|
||||
if (!enumerator.MoveNext())
|
||||
{
|
||||
throw new InvalidOperationException("Input token list is empty");
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
LrState top = stack.Peek();
|
||||
|
||||
// 尝试进行移进
|
||||
bool acceptFlag = false, reduceFlag = false;
|
||||
foreach (Expression e in top.Expressions)
|
||||
{
|
||||
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
|
||||
{
|
||||
if (e.Left == Begin)
|
||||
{
|
||||
acceptFlag = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
reduceFlag = true;
|
||||
|
||||
for (int i = 0; i < e.Right.Count; i++)
|
||||
{
|
||||
stack.Pop();
|
||||
}
|
||||
|
||||
stack.Push(stack.Peek().Transformer[e.Left]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (acceptFlag)
|
||||
{
|
||||
// 接受文法 退出循环
|
||||
break;
|
||||
}
|
||||
|
||||
if (reduceFlag)
|
||||
{
|
||||
// 归约
|
||||
continue;
|
||||
}
|
||||
|
||||
// 尝试进行移进
|
||||
if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next))
|
||||
{
|
||||
stack.Push(next);
|
||||
if (enumerator.MoveNext())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidOperationException("Run out of token but not accept");
|
||||
}
|
||||
}
|
||||
|
||||
throw new InvalidOperationException("Failed to analyse input grammar");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
namespace Canon.Core.GrammarParser;
|
||||
using System.Text;
|
||||
|
||||
namespace Canon.Core.GrammarParser;
|
||||
|
||||
/// <summary>
|
||||
/// LR语法中的一个项目集规范族
|
||||
|
@ -76,6 +78,18 @@ public class LrState : IEquatable<LrState>
|
|||
return hash;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder builder = new();
|
||||
|
||||
foreach (Expression e in Expressions)
|
||||
{
|
||||
builder.Append(e).Append('\n');
|
||||
}
|
||||
|
||||
return builder.ToString();
|
||||
}
|
||||
|
||||
public static bool operator ==(LrState a, LrState b)
|
||||
{
|
||||
return a.Equals(b);
|
||||
|
|
|
@ -66,7 +66,7 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
|
|||
/// <summary>
|
||||
/// 栈底的终结符
|
||||
/// </summary>
|
||||
public static Terminator EndTerminator => new(KeywordType.End);
|
||||
public static Terminator EndTerminator => new(SemanticTokenType.End);
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
|
@ -85,6 +85,21 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
|
|||
}
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
switch (_terminatorType)
|
||||
{
|
||||
case SemanticTokenType.Keyword:
|
||||
return _keywordType.ToString();
|
||||
case SemanticTokenType.Operator:
|
||||
return _operatorType.ToString();
|
||||
case SemanticTokenType.Delimiter:
|
||||
return _delimiterType.ToString();
|
||||
default:
|
||||
return _keywordType.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
public bool Equals(Terminator? other)
|
||||
{
|
||||
if (other is null)
|
||||
|
@ -190,6 +205,8 @@ public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
|
|||
return _type.GetHashCode();
|
||||
}
|
||||
|
||||
public override string ToString() => _type.ToString();
|
||||
|
||||
public bool Equals(NonTerminator? other)
|
||||
{
|
||||
if (other is null)
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
namespace Canon.Core.LexicalParser;
|
||||
using Canon.Core.GrammarParser;
|
||||
|
||||
namespace Canon.Core.LexicalParser;
|
||||
|
||||
using Enums;
|
||||
|
||||
|
@ -23,6 +25,39 @@ public abstract class SemanticToken
|
|||
/// 记号的字面值
|
||||
/// </summary>
|
||||
public required string LiteralValue { get; init; }
|
||||
|
||||
public static implicit operator Terminator(SemanticToken token)
|
||||
{
|
||||
switch (token.TokenType)
|
||||
{
|
||||
case SemanticTokenType.Character:
|
||||
return Terminator.CharacterTerminator;
|
||||
case SemanticTokenType.Identifier:
|
||||
return Terminator.IdentifierTerminator;
|
||||
case SemanticTokenType.Number:
|
||||
return Terminator.NumberTerminator;
|
||||
case SemanticTokenType.End:
|
||||
return Terminator.EndTerminator;
|
||||
case SemanticTokenType.Delimiter:
|
||||
return new Terminator(((DelimiterSemanticToken)token).DelimiterType);
|
||||
case SemanticTokenType.Keyword:
|
||||
return new Terminator(((KeywordSemanticToken)token).KeywordType);
|
||||
case SemanticTokenType.Operator:
|
||||
return new Terminator(((OperatorSemanticToken)token).OperatorType);
|
||||
default:
|
||||
throw new ArgumentException("Unknown token type");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 栈底符号单例对象
|
||||
/// </summary>
|
||||
public static EndSemanticToken End => new()
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, LiteralValue = string.Empty
|
||||
};
|
||||
|
||||
public override string ToString() => LiteralValue;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -189,3 +224,8 @@ public class IdentifierSemanticToken : SemanticToken
|
|||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public class EndSemanticToken : SemanticToken
|
||||
{
|
||||
public override SemanticTokenType TokenType => SemanticTokenType.End;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
using Canon.Core.Enums;
|
||||
using Canon.Core.GrammarParser;
|
||||
using Canon.Core.LexicalParser;
|
||||
|
||||
namespace Canon.Tests.GrammarParserTests;
|
||||
|
||||
|
@ -107,4 +108,28 @@ public class SimpleGrammarTests
|
|||
Assert.Contains(new Terminator(DelimiterType.LeftParenthesis), grammar.BeginState.Transformer.Keys);
|
||||
Assert.Contains(Terminator.IdentifierTerminator, grammar.BeginState.Transformer.Keys);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void AnalyseSingleSentenceTest()
|
||||
{
|
||||
GrammarBuilder builder = new()
|
||||
{
|
||||
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
|
||||
};
|
||||
|
||||
Grammar grammar = builder.Build();
|
||||
List<SemanticToken> tokens =
|
||||
[
|
||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||
new OperatorSemanticToken
|
||||
{
|
||||
LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus
|
||||
},
|
||||
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
|
||||
SemanticToken.End
|
||||
];
|
||||
|
||||
// 验证分析语句不会抛出错误
|
||||
grammar.Analyse(tokens);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user