add: basic grammar parser including expression and program.

This commit is contained in:
2024-08-15 16:18:32 +08:00
parent fee62ec289
commit bdcc59a2ab
19 changed files with 829 additions and 0 deletions

View File

@@ -0,0 +1,201 @@
using CanonSharp.Combinator.Abstractions;
using CanonSharp.Combinator.Extensions;
using static CanonSharp.Combinator.ParserBuilder;
using CanonSharp.Pascal.Scanner;
using CanonSharp.Pascal.SyntaxTree;
namespace CanonSharp.Pascal.Parser;
public sealed class GrammarParser : GrammarParserBuilder
{
public static IParser<LexicalToken, SyntaxNodeBase> FactorParser()
{
// factor -> true | false | num
IParser<LexicalToken, SyntaxNodeBase> trueParser = from _ in Keyword("true")
select new BooleanValueNode(true);
IParser<LexicalToken, SyntaxNodeBase> falseParser = from _ in Keyword("false")
select new BooleanValueNode(false);
IParser<LexicalToken, SyntaxNodeBase> integerParser =
from token in Satisfy<LexicalToken>(x => x.TokenType == LexicalTokenType.ConstInteger)
select new IntegerValueNode(int.Parse(token.LiteralValue));
IParser<LexicalToken, SyntaxNodeBase> floatParser =
from token in Satisfy<LexicalToken>(x => x.TokenType == LexicalTokenType.ConstFloat)
select new FloatValueNode(double.Parse(token.LiteralValue));
// factor -> - factor | + factor
IParser<LexicalToken, SyntaxNodeBase> minusParser =
from _ in Operator("-")
from node in FactorParser()
select new UnaryOperatorNode(UnaryOperatorType.Minus, node);
IParser<LexicalToken, SyntaxNodeBase> plusParser =
from _ in Operator("+")
from node in FactorParser()
select new UnaryOperatorNode(UnaryOperatorType.Plus, node);
IParser<LexicalToken, SyntaxNodeBase> notParser =
from _ in Keyword("not")
from node in FactorParser()
select new UnaryOperatorNode(UnaryOperatorType.Not, node);
IParser<LexicalToken, SyntaxNodeBase> parenthesisParser =
from _1 in Delimiter("(")
from node in ExpressionParser()
from _2 in Delimiter(")")
select node;
return Choice(
trueParser,
falseParser,
integerParser,
floatParser,
minusParser,
plusParser,
notParser,
VariableParser(),
parenthesisParser
);
}
private static IParser<LexicalToken, SyntaxNodeBase> TermRecursively(SyntaxNodeBase left)
{
// MultiplyOperator -> * | / | div | mod | and
IParser<LexicalToken, BinaryOperatorType> multiplyOperator = Choice(
from _ in Operator("*")
select BinaryOperatorType.Multiply,
from _ in Operator("/")
select BinaryOperatorType.Divide,
from _ in Keyword("div")
select BinaryOperatorType.IntegerDivide,
from _ in Keyword("mod")
select BinaryOperatorType.Mod,
from _ in Keyword("and")
select BinaryOperatorType.And);
return multiplyOperator.Next(op =>
{
return FactorParser().Map(right => new BinaryOperatorNode(op, left, right))
.Bind(TermRecursively);
}, left);
}
public static IParser<LexicalToken, SyntaxNodeBase> TermParser()
{
// Term -> Factor | Term MultiplyOperator Factor
// 消除左递归为
// Term -> Factor Term'
// Term' -> MultiplyOperator Factor Term' | ε
return FactorParser().Bind(TermRecursively);
}
private static IParser<LexicalToken, SyntaxNodeBase> SimpleExpressionRecursively(SyntaxNodeBase left)
{
// AddOperator -> + | - | or
IParser<LexicalToken, BinaryOperatorType> addOperator = Choice(
from _ in Operator("+")
select BinaryOperatorType.Add,
from _ in Operator("-")
select BinaryOperatorType.Subtract,
from _ in Keyword("or")
select BinaryOperatorType.Or);
return addOperator.Next(op =>
{
return TermParser().Map(right => new BinaryOperatorNode(op, left, right))
.Bind(SimpleExpressionRecursively);
}, left);
}
public static IParser<LexicalToken, SyntaxNodeBase> SimpleExpressionParser()
{
// SimpleExpression -> Term | SimpleExpression AddOperator Term
// 消除左递归为
// SimpleExpression -> Term SimpleExpression'
// SimpleExpression' -> AddOperator Term SimpleExpression' | ε
return TermParser().Bind(SimpleExpressionRecursively);
}
public static IParser<LexicalToken, SyntaxNodeBase> ExpressionParser()
{
// RelationOperator -> = | <> | < | <= | > | >=
IParser<LexicalToken, BinaryOperatorType> relationOperator = Choice(
from _ in Operator("=")
select BinaryOperatorType.Equal,
from _ in Operator("<>")
select BinaryOperatorType.NotEqual,
from _ in Operator("<")
select BinaryOperatorType.Less,
from _ in Operator("<=")
select BinaryOperatorType.LessEqual,
from _ in Operator(">")
select BinaryOperatorType.Greater,
from _ in Operator(">=")
select BinaryOperatorType.GreaterEqual);
// Expression -> SimpleExpression | SimpleExpression RelationOperator SimpleExpression
return Choice(
from left in SimpleExpressionParser()
from op in relationOperator
from right in SimpleExpressionParser()
select new BinaryOperatorNode(op, left, right),
SimpleExpressionParser()
);
}
/// <summary>
/// ExpressionList Parser
/// ExpressionList -> Expression | ExpressionList , Expression
/// </summary>
/// <returns></returns>
public static IParser<LexicalToken, IEnumerable<SyntaxNodeBase>> ExpressionsParser()
=> ExpressionParser().SeparatedBy1(Delimiter(","));
public static IParser<LexicalToken, VariableNode> VariableParser()
{
return from token in Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Identifier)
select new VariableNode(token.LiteralValue);
}
public static IParser<LexicalToken, SyntaxNodeBase> StatementParser()
{
return Choice(
from variable in VariableParser()
from _ in Operator(":=")
from expression in ExpressionParser()
select new AssignNode(variable, expression)
);
}
public static IParser<LexicalToken, BlockNode> CompoundStatementParser()
{
return from _1 in Keyword("begin")
from statements in StatementParser().SeparatedOrEndBy(Delimiter(";"))
from _2 in Keyword("end")
select new BlockNode(statements);
}
public static IParser<LexicalToken, ProgramBody> ProgramBodyParser()
{
return from block in CompoundStatementParser()
select new ProgramBody(block);
}
public static IParser<LexicalToken, ProgramHead> ProgramHeadParser()
{
return from _ in Keyword("program")
from token in Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Identifier)
select new ProgramHead(token);
}
public static IParser<LexicalToken, Program> ProgramParser()
{
return from head in ProgramHeadParser()
from _1 in Delimiter(";")
from body in ProgramBodyParser()
from _2 in Delimiter(".")
select new Program(head, body);
}
}

View File

@@ -0,0 +1,17 @@
using CanonSharp.Combinator.Abstractions;
using CanonSharp.Pascal.Scanner;
using static CanonSharp.Combinator.ParserBuilder;
namespace CanonSharp.Pascal.Parser;
public abstract class GrammarParserBuilder
{
protected static IParser<LexicalToken, LexicalToken> Keyword(string value)
=> Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Keyword && token.LiteralValue == value);
protected static IParser<LexicalToken, LexicalToken> Operator(string value)
=> Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Operator && token.LiteralValue == value);
protected static IParser<LexicalToken, LexicalToken> Delimiter(string value)
=> Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Delimiter && token.LiteralValue == value);
}

View File

@@ -0,0 +1,53 @@
using CanonSharp.Combinator.Abstractions;
using CanonSharp.Pascal.Scanner;
namespace CanonSharp.Pascal.Parser;
public sealed class LexicalTokenReadState : IReadState<LexicalToken, LexicalTokenReadState>
{
private readonly List<LexicalToken> _tokens;
private readonly int _pos;
public LexicalToken Current => _tokens[_pos];
public bool HasValue => _pos < _tokens.Count;
public LexicalTokenReadState Next => new(_tokens, _pos + 1);
private LexicalTokenReadState(List<LexicalToken> tokens, int pos)
{
_tokens = tokens;
_pos = pos;
}
public LexicalTokenReadState(IEnumerable<LexicalToken> tokens)
{
_tokens = tokens.ToList();
_pos = 0;
}
public bool Equals(LexicalTokenReadState? other)
{
if (other is null)
{
return false;
}
if (_tokens.Count != other._tokens.Count)
{
return false;
}
foreach ((LexicalToken first, LexicalToken second) in _tokens.Zip(other._tokens))
{
if (!first.Equals(second))
{
return false;
}
}
return _pos == other._pos;
}
public override string ToString() => HasValue ? Current.ToString() : "End of input stream.";
}