feat: 对输入的记号流进行分析 (#4)

添加了语法分析中各个重要状态类的`ToString`方法,方便调试

Reviewed-on: PostGuard/Canon#4
This commit is contained in:
jackfiled 2024-03-11 19:33:02 +08:00
parent f8bf60e7ea
commit 9930dbc42c
7 changed files with 192 additions and 4 deletions

View File

@ -12,5 +12,6 @@ jobs:
with:
path: ~/.nuget/packages
key: ${{ runner.os }}-nuget
save-always: true
- name: Run test code
run: dotnet test

View File

@ -78,6 +78,26 @@ public class Expression : IEquatable<Expression>
return hash;
}
public override string ToString()
{
string result = $"{Left} -> ";
for (int i = 0; i < Right.Count; i++)
{
if (i == Pos)
{
result += '~';
}
result += ' ';
result += Right[i].ToString();
}
result += $", {LookAhead}";
return result;
}
public static bool operator ==(Expression a, Expression b)
{
return a.Equals(b);

View File

@ -1,8 +1,79 @@
namespace Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
namespace Canon.Core.GrammarParser;
public class Grammar
{
public required NonTerminator Begin { get; init; }
public required LrState BeginState { get; init; }
public void Analyse(IEnumerable<SemanticToken> tokens)
{
Stack<LrState> stack = [];
stack.Push(BeginState);
using IEnumerator<SemanticToken> enumerator = tokens.GetEnumerator();
if (!enumerator.MoveNext())
{
throw new InvalidOperationException("Input token list is empty");
}
while (true)
{
LrState top = stack.Peek();
// 尝试进行移进
bool acceptFlag = false, reduceFlag = false;
foreach (Expression e in top.Expressions)
{
if (e.Pos == e.Right.Count && e.LookAhead == enumerator.Current)
{
if (e.Left == Begin)
{
acceptFlag = true;
}
else
{
reduceFlag = true;
for (int i = 0; i < e.Right.Count; i++)
{
stack.Pop();
}
stack.Push(stack.Peek().Transformer[e.Left]);
}
}
}
if (acceptFlag)
{
// 接受文法 退出循环
break;
}
if (reduceFlag)
{
// 归约
continue;
}
// 尝试进行移进
if (top.Transformer.TryGetValue(enumerator.Current, out LrState? next))
{
stack.Push(next);
if (enumerator.MoveNext())
{
continue;
}
else
{
throw new InvalidOperationException("Run out of token but not accept");
}
}
throw new InvalidOperationException("Failed to analyse input grammar");
}
}
}

View File

@ -1,4 +1,6 @@
namespace Canon.Core.GrammarParser;
using System.Text;
namespace Canon.Core.GrammarParser;
/// <summary>
/// LR语法中的一个项目集规范族
@ -76,6 +78,18 @@ public class LrState : IEquatable<LrState>
return hash;
}
public override string ToString()
{
StringBuilder builder = new();
foreach (Expression e in Expressions)
{
builder.Append(e).Append('\n');
}
return builder.ToString();
}
public static bool operator ==(LrState a, LrState b)
{
return a.Equals(b);

View File

@ -66,7 +66,7 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
/// <summary>
/// 栈底的终结符
/// </summary>
public static Terminator EndTerminator => new(KeywordType.End);
public static Terminator EndTerminator => new(SemanticTokenType.End);
public override int GetHashCode()
{
@ -85,6 +85,21 @@ public class Terminator : TerminatorBase, IEquatable<Terminator>
}
}
public override string ToString()
{
switch (_terminatorType)
{
case SemanticTokenType.Keyword:
return _keywordType.ToString();
case SemanticTokenType.Operator:
return _operatorType.ToString();
case SemanticTokenType.Delimiter:
return _delimiterType.ToString();
default:
return _keywordType.ToString();
}
}
public bool Equals(Terminator? other)
{
if (other is null)
@ -190,6 +205,8 @@ public class NonTerminator : TerminatorBase, IEquatable<NonTerminator>
return _type.GetHashCode();
}
public override string ToString() => _type.ToString();
public bool Equals(NonTerminator? other)
{
if (other is null)

View File

@ -1,4 +1,6 @@
namespace Canon.Core.LexicalParser;
using Canon.Core.GrammarParser;
namespace Canon.Core.LexicalParser;
using Enums;
@ -23,6 +25,39 @@ public abstract class SemanticToken
/// 记号的字面值
/// </summary>
public required string LiteralValue { get; init; }
public static implicit operator Terminator(SemanticToken token)
{
switch (token.TokenType)
{
case SemanticTokenType.Character:
return Terminator.CharacterTerminator;
case SemanticTokenType.Identifier:
return Terminator.IdentifierTerminator;
case SemanticTokenType.Number:
return Terminator.NumberTerminator;
case SemanticTokenType.End:
return Terminator.EndTerminator;
case SemanticTokenType.Delimiter:
return new Terminator(((DelimiterSemanticToken)token).DelimiterType);
case SemanticTokenType.Keyword:
return new Terminator(((KeywordSemanticToken)token).KeywordType);
case SemanticTokenType.Operator:
return new Terminator(((OperatorSemanticToken)token).OperatorType);
default:
throw new ArgumentException("Unknown token type");
}
}
/// <summary>
/// 栈底符号单例对象
/// </summary>
public static EndSemanticToken End => new()
{
LinePos = 0, CharacterPos = 0, LiteralValue = string.Empty
};
public override string ToString() => LiteralValue;
}
/// <summary>
@ -189,3 +224,8 @@ public class IdentifierSemanticToken : SemanticToken
return false;
}
}
public class EndSemanticToken : SemanticToken
{
public override SemanticTokenType TokenType => SemanticTokenType.End;
}

View File

@ -1,5 +1,6 @@
using Canon.Core.Enums;
using Canon.Core.GrammarParser;
using Canon.Core.LexicalParser;
namespace Canon.Tests.GrammarParserTests;
@ -107,4 +108,28 @@ public class SimpleGrammarTests
Assert.Contains(new Terminator(DelimiterType.LeftParenthesis), grammar.BeginState.Transformer.Keys);
Assert.Contains(Terminator.IdentifierTerminator, grammar.BeginState.Transformer.Keys);
}
[Fact]
public void AnalyseSingleSentenceTest()
{
GrammarBuilder builder = new()
{
Generators = s_simpleGrammar, Begin = new NonTerminator(NonTerminatorType.StartNonTerminator)
};
Grammar grammar = builder.Build();
List<SemanticToken> tokens =
[
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
new OperatorSemanticToken
{
LinePos = 0, CharacterPos = 0, LiteralValue = "+", OperatorType = OperatorType.Plus
},
new IdentifierSemanticToken { LinePos = 0, CharacterPos = 0, LiteralValue = "n" },
SemanticToken.End
];
// 验证分析语句不会抛出错误
grammar.Analyse(tokens);
}
}