CanonSharp/CanonSharp.Pascal/Parser/GrammarParser.cs
jackfiled 89ce313b77 feat: CanonSharp Benchmark. (#4)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/4
Co-authored-by: jackfiled <xcrenchangjun@outlook.com>
Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
2024-08-19 14:37:34 +08:00

400 lines
15 KiB
C#

using CanonSharp.Combinator.Abstractions;
using CanonSharp.Combinator.Extensions;
using static CanonSharp.Combinator.ParserBuilder;
using CanonSharp.Pascal.Scanner;
using CanonSharp.Pascal.SyntaxTree;
namespace CanonSharp.Pascal.Parser;
public sealed class GrammarParser : GrammarParserBuilder
{
public Program Parse(IEnumerable<LexicalToken> tokens)
{
IParseResult<LexicalToken, Program> result = ProgramParser().Parse(new LexicalTokenReadState(tokens));
return result.Value;
}
public static IParser<LexicalToken, SyntaxNodeBase> FactorParser()
{
// factor -> - factor | + factor
IParser<LexicalToken, SyntaxNodeBase> minusParser =
from _ in Operator("-")
from node in FactorParser()
select new UnaryOperatorNode(UnaryOperatorType.Minus, node);
IParser<LexicalToken, SyntaxNodeBase> plusParser =
from _ in Operator("+")
from node in FactorParser()
select new UnaryOperatorNode(UnaryOperatorType.Plus, node);
IParser<LexicalToken, SyntaxNodeBase> notParser =
from _ in Keyword("not")
from node in FactorParser()
select new UnaryOperatorNode(UnaryOperatorType.Not, node);
IParser<LexicalToken, SyntaxNodeBase> parenthesisParser =
from _1 in Delimiter("(")
from node in ExpressionParser()
from _2 in Delimiter(")")
select node;
IParser<LexicalToken, SyntaxNodeBase> procedureCallParser =
from identifier in IdentifierParser()
from _ in Delimiter("(")
from expressions in ExpressionParser().SeparatedBy(Delimiter(","))
from _1 in Delimiter(")")
select new ProcedureCallNode(identifier, expressions);
return Choice(
TrueParser(),
FalseParser(),
NumberParser(),
minusParser,
plusParser,
notParser,
procedureCallParser,
VariableParser(),
parenthesisParser
);
}
private static IParser<LexicalToken, SyntaxNodeBase> TermRecursively(SyntaxNodeBase left)
{
// MultiplyOperator -> * | / | div | mod | and
IParser<LexicalToken, BinaryOperatorType> multiplyOperator = Choice(
from _ in Operator("*")
select BinaryOperatorType.Multiply,
from _ in Operator("/")
select BinaryOperatorType.Divide,
from _ in Keyword("div")
select BinaryOperatorType.IntegerDivide,
from _ in Keyword("mod")
select BinaryOperatorType.Mod,
from _ in Keyword("and")
select BinaryOperatorType.And);
return multiplyOperator.Next(op =>
{
return FactorParser().Map(right => new BinaryOperatorNode(op, left, right))
.Bind(TermRecursively);
}, left);
}
public static IParser<LexicalToken, SyntaxNodeBase> TermParser()
{
// Term -> Factor | Term MultiplyOperator Factor
// 消除左递归为
// Term -> Factor Term'
// Term' -> MultiplyOperator Factor Term' | ε
return FactorParser().Bind(TermRecursively);
}
private static IParser<LexicalToken, SyntaxNodeBase> SimpleExpressionRecursively(SyntaxNodeBase left)
{
// AddOperator -> + | - | or
IParser<LexicalToken, BinaryOperatorType> addOperator = Choice(
from _ in Operator("+")
select BinaryOperatorType.Add,
from _ in Operator("-")
select BinaryOperatorType.Subtract,
from _ in Keyword("or")
select BinaryOperatorType.Or);
return addOperator.Next(op =>
{
return TermParser().Map(right => new BinaryOperatorNode(op, left, right))
.Bind(SimpleExpressionRecursively);
}, left);
}
public static IParser<LexicalToken, SyntaxNodeBase> SimpleExpressionParser()
{
// SimpleExpression -> Term | SimpleExpression AddOperator Term
// 消除左递归为
// SimpleExpression -> Term SimpleExpression'
// SimpleExpression' -> AddOperator Term SimpleExpression' | ε
return TermParser().Bind(SimpleExpressionRecursively);
}
public static IParser<LexicalToken, SyntaxNodeBase> ExpressionParser()
{
// RelationOperator -> = | <> | < | <= | > | >=
IParser<LexicalToken, BinaryOperatorType> relationOperator = Choice(
from _ in Operator("=")
select BinaryOperatorType.Equal,
from _ in Operator("<>")
select BinaryOperatorType.NotEqual,
from _ in Operator("<")
select BinaryOperatorType.Less,
from _ in Operator("<=")
select BinaryOperatorType.LessEqual,
from _ in Operator(">")
select BinaryOperatorType.Greater,
from _ in Operator(">=")
select BinaryOperatorType.GreaterEqual);
// Expression -> SimpleExpression | SimpleExpression RelationOperator SimpleExpression
return Choice(
from left in SimpleExpressionParser()
from op in relationOperator
from right in SimpleExpressionParser()
select new BinaryOperatorNode(op, left, right),
SimpleExpressionParser()
);
}
/// <summary>
/// ExpressionList Parser
/// ExpressionList -> Expression | ExpressionList , Expression
/// </summary>
/// <returns></returns>
public static IParser<LexicalToken, IEnumerable<SyntaxNodeBase>> ExpressionsParser()
=> ExpressionParser().SeparatedBy1(Delimiter(","));
public static IParser<LexicalToken, VariableNode> VariableParser()
{
return Choice(
from token in IdentifierParser()
from _ in Delimiter("[")
from expressions in ExpressionsParser()
from _1 in Delimiter("]")
select new VariableNode(token, expressions),
from token in IdentifierParser()
select new VariableNode(token)
);
}
public static IParser<LexicalToken, IfNode> IfParser()
{
IParser<LexicalToken, IfNode> commonPart = from _ in Keyword("if")
from condition in ExpressionParser()
from _1 in Keyword("then")
from statement in StatementParser()
select new IfNode(condition, statement);
return Choice(
from common in commonPart
from _ in Keyword("else")
from elseStatement in StatementParser()
select new IfNode(common.Condition, common.Statement, elseStatement),
commonPart
);
}
public static IParser<LexicalToken, ForNode> ForParser()
{
return from _ in Keyword("for")
from identifier in IdentifierParser()
from _1 in Operator(":=")
from left in ExpressionParser()
from _2 in Keyword("to")
from right in ExpressionParser()
from _3 in Keyword("do")
from statement in StatementParser()
select new ForNode(identifier, left, right, statement);
}
public static IParser<LexicalToken, WhileNode> WhileParser()
{
return from _ in Keyword("while")
from condition in ExpressionParser()
from _1 in Keyword("do")
from statement in StatementParser()
select new WhileNode(condition, statement);
}
public static IParser<LexicalToken, ProcedureCallNode> ProcedureCallParser()
{
return Choice(
from identifier in IdentifierParser()
from _ in Delimiter("(")
from expressions in ExpressionParser().SeparatedBy(Delimiter(","))
from _1 in Delimiter(")")
select new ProcedureCallNode(identifier, expressions),
from identifier in IdentifierParser()
select new ProcedureCallNode(identifier, [])
);
}
public static IParser<LexicalToken, SyntaxNodeBase> StatementParser()
{
return Choice<LexicalToken, SyntaxNodeBase>(
from variable in VariableParser()
from _ in Operator(":=")
from expression in ExpressionParser()
select new AssignNode(variable, expression),
ProcedureCallParser(),
IfParser(),
ForParser(),
WhileParser(),
CompoundStatementParser()
);
}
public static IParser<LexicalToken, BlockNode> CompoundStatementParser()
{
return from _1 in Keyword("begin")
from statements in StatementParser().SeparatedOrEndBy(Delimiter(";"))
from _2 in Keyword("end")
select new BlockNode(statements);
}
public static IParser<LexicalToken, SyntaxNodeBase> ConstValueParser()
{
return Choice(
from _ in Operator("-")
from num in NumberParser()
select new UnaryOperatorNode(UnaryOperatorType.Minus, num),
from _ in Operator("+")
from num in NumberParser()
select new UnaryOperatorNode(UnaryOperatorType.Plus, num),
NumberParser(),
CharParser(),
TrueParser(),
FalseParser()
);
}
public static IParser<LexicalToken, SyntaxNodeBase> ConstDeclarationParser()
{
return from identifier in Satisfy<LexicalToken>(token =>
token.TokenType == LexicalTokenType.Identifier)
from _ in Operator("=")
from node in ConstValueParser()
select new ConstantNode(identifier, node);
}
public static IParser<LexicalToken, BlockNode> ConstDeclarationsParser()
{
return (from _ in Keyword("const")
from tokens in ConstDeclarationParser().SeparatedOrEndBy1(Delimiter(";"))
select new BlockNode(tokens)).Try(new BlockNode([]));
}
public static IParser<LexicalToken, TypeNode> ArrayTypeParser()
{
IParser<LexicalToken, IEnumerable<ArrayRange>> arrayRangeParser = (
from left in IntegerParser()
from _ in Delimiter("..")
from right in IntegerParser()
select new ArrayRange(left.Value, right.Value)).SeparatedBy1(Delimiter(","));
return from _ in Keyword("array")
from _1 in Delimiter("[")
from ranges in arrayRangeParser
from _2 in Delimiter("]")
from _3 in Keyword("of")
from typeToken in BasicTypeParser()
select new TypeNode(typeToken, ranges);
}
public static IParser<LexicalToken, SyntaxNodeBase> TypeParser()
{
return Choice(ArrayTypeParser(),
from token in BasicTypeParser()
select new TypeNode(token));
}
public static IParser<LexicalToken, VariableDeclarationNode> VariableDeclarationParser()
{
return from tokens in Satisfy<LexicalToken>(
token => token.TokenType == LexicalTokenType.Identifier).SeparatedBy1(Delimiter(","))
from _1 in Delimiter(":")
from type in TypeParser()
select new VariableDeclarationNode(tokens, type.Convert<TypeNode>());
}
public static IParser<LexicalToken, BlockNode> VariableDeclarationsParser()
{
return (from _ in Keyword("var")
from nodes in VariableDeclarationParser().SeparatedOrEndBy1(Delimiter(";"))
select new BlockNode(nodes)).Try(new BlockNode([]));
}
public static IParser<LexicalToken, IEnumerable<Parameter>> ParameterParser()
{
return Choice(
from _ in Keyword("var")
from tokens in IdentifierParser().SeparatedBy1(Delimiter(","))
from _1 in Delimiter(":")
from typeToken in TypeParser()
select tokens.Select(x => new Parameter(true, x, typeToken)),
from tokens in IdentifierParser().SeparatedBy1(Delimiter(","))
from _ in Delimiter(":")
from typeToken in TypeParser()
select tokens.Select(x => new Parameter(false, x, typeToken))
);
}
public static IParser<LexicalToken, IEnumerable<Parameter>> FormalParameterParser()
{
return (from _ in Delimiter("(")
from parameters in ParameterParser().SeparatedBy(Delimiter(";"))
from _1 in Delimiter(")")
select parameters.Aggregate(new List<Parameter>(), (result, array) =>
{
result.AddRange(array);
return result;
})).Try([]);
}
public static IParser<LexicalToken, SubprogramHead> SubprogramHeadParser()
{
return Choice(
from _ in Keyword("procedure")
from identifier in IdentifierParser()
from parameters in FormalParameterParser()
select new SubprogramHead(identifier, parameters),
from _ in Keyword("function")
from identifier in IdentifierParser()
from parameters in FormalParameterParser()
from _1 in Delimiter(":")
from typeToken in TypeParser()
select new SubprogramHead(identifier, parameters, typeToken)
);
}
public static IParser<LexicalToken, SubprogramBody> SubprogramBodyParser()
{
return from constant in ConstDeclarationsParser()
from variables in VariableDeclarationsParser()
from block in CompoundStatementParser()
select new SubprogramBody(constant, variables, block);
}
public static IParser<LexicalToken, Subprogram> SubprogramParser()
{
return from head in SubprogramHeadParser()
from _ in Delimiter(";")
from body in SubprogramBodyParser()
select new Subprogram(head, body);
}
public static IParser<LexicalToken, ProgramBody> ProgramBodyParser()
{
return from constant in ConstDeclarationsParser()
from variables in VariableDeclarationsParser()
from subprograms in SubprogramParser().SeparatedOrEndBy(Delimiter(";"))
.Map(x => new BlockNode(x))
from block in CompoundStatementParser()
select new ProgramBody(constant, variables, subprograms, block);
}
public static IParser<LexicalToken, ProgramHead> ProgramHeadParser()
{
return from _ in Keyword("program")
from token in Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Identifier)
select new ProgramHead(token);
}
public static IParser<LexicalToken, Program> ProgramParser()
{
return from head in ProgramHeadParser()
from _1 in Delimiter(";")
from body in ProgramBodyParser()
from _2 in Delimiter(".")
select new Program(head, body);
}
}