feat: Grammar Parser (#3)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/3 Co-authored-by: jackfiled <xcrenchangjun@outlook.com> Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
This commit is contained in:
392
CanonSharp.Pascal/Parser/GrammarParser.cs
Normal file
392
CanonSharp.Pascal/Parser/GrammarParser.cs
Normal file
@@ -0,0 +1,392 @@
|
||||
using CanonSharp.Combinator.Abstractions;
|
||||
using CanonSharp.Combinator.Extensions;
|
||||
using static CanonSharp.Combinator.ParserBuilder;
|
||||
using CanonSharp.Pascal.Scanner;
|
||||
using CanonSharp.Pascal.SyntaxTree;
|
||||
|
||||
namespace CanonSharp.Pascal.Parser;
|
||||
|
||||
public sealed class GrammarParser : GrammarParserBuilder
|
||||
{
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> FactorParser()
|
||||
{
|
||||
// factor -> - factor | + factor
|
||||
IParser<LexicalToken, SyntaxNodeBase> minusParser =
|
||||
from _ in Operator("-")
|
||||
from node in FactorParser()
|
||||
select new UnaryOperatorNode(UnaryOperatorType.Minus, node);
|
||||
|
||||
IParser<LexicalToken, SyntaxNodeBase> plusParser =
|
||||
from _ in Operator("+")
|
||||
from node in FactorParser()
|
||||
select new UnaryOperatorNode(UnaryOperatorType.Plus, node);
|
||||
|
||||
IParser<LexicalToken, SyntaxNodeBase> notParser =
|
||||
from _ in Keyword("not")
|
||||
from node in FactorParser()
|
||||
select new UnaryOperatorNode(UnaryOperatorType.Not, node);
|
||||
|
||||
IParser<LexicalToken, SyntaxNodeBase> parenthesisParser =
|
||||
from _1 in Delimiter("(")
|
||||
from node in ExpressionParser()
|
||||
from _2 in Delimiter(")")
|
||||
select node;
|
||||
|
||||
IParser<LexicalToken, SyntaxNodeBase> procedureCallParser =
|
||||
from identifier in IdentifierParser()
|
||||
from _ in Delimiter("(")
|
||||
from expressions in ExpressionParser().SeparatedBy(Delimiter(","))
|
||||
from _1 in Delimiter(")")
|
||||
select new ProcedureCallNode(identifier, expressions);
|
||||
|
||||
return Choice(
|
||||
TrueParser(),
|
||||
FalseParser(),
|
||||
NumberParser(),
|
||||
minusParser,
|
||||
plusParser,
|
||||
notParser,
|
||||
procedureCallParser,
|
||||
VariableParser(),
|
||||
parenthesisParser
|
||||
);
|
||||
}
|
||||
|
||||
private static IParser<LexicalToken, SyntaxNodeBase> TermRecursively(SyntaxNodeBase left)
|
||||
{
|
||||
// MultiplyOperator -> * | / | div | mod | and
|
||||
IParser<LexicalToken, BinaryOperatorType> multiplyOperator = Choice(
|
||||
from _ in Operator("*")
|
||||
select BinaryOperatorType.Multiply,
|
||||
from _ in Operator("/")
|
||||
select BinaryOperatorType.Divide,
|
||||
from _ in Keyword("div")
|
||||
select BinaryOperatorType.IntegerDivide,
|
||||
from _ in Keyword("mod")
|
||||
select BinaryOperatorType.Mod,
|
||||
from _ in Keyword("and")
|
||||
select BinaryOperatorType.And);
|
||||
|
||||
return multiplyOperator.Next(op =>
|
||||
{
|
||||
return FactorParser().Map(right => new BinaryOperatorNode(op, left, right))
|
||||
.Bind(TermRecursively);
|
||||
}, left);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> TermParser()
|
||||
{
|
||||
// Term -> Factor | Term MultiplyOperator Factor
|
||||
// 消除左递归为
|
||||
// Term -> Factor Term'
|
||||
// Term' -> MultiplyOperator Factor Term' | ε
|
||||
return FactorParser().Bind(TermRecursively);
|
||||
}
|
||||
|
||||
private static IParser<LexicalToken, SyntaxNodeBase> SimpleExpressionRecursively(SyntaxNodeBase left)
|
||||
{
|
||||
// AddOperator -> + | - | or
|
||||
IParser<LexicalToken, BinaryOperatorType> addOperator = Choice(
|
||||
from _ in Operator("+")
|
||||
select BinaryOperatorType.Add,
|
||||
from _ in Operator("-")
|
||||
select BinaryOperatorType.Subtract,
|
||||
from _ in Keyword("or")
|
||||
select BinaryOperatorType.Or);
|
||||
|
||||
return addOperator.Next(op =>
|
||||
{
|
||||
return TermParser().Map(right => new BinaryOperatorNode(op, left, right))
|
||||
.Bind(SimpleExpressionRecursively);
|
||||
}, left);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> SimpleExpressionParser()
|
||||
{
|
||||
// SimpleExpression -> Term | SimpleExpression AddOperator Term
|
||||
// 消除左递归为
|
||||
// SimpleExpression -> Term SimpleExpression'
|
||||
// SimpleExpression' -> AddOperator Term SimpleExpression' | ε
|
||||
return TermParser().Bind(SimpleExpressionRecursively);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> ExpressionParser()
|
||||
{
|
||||
// RelationOperator -> = | <> | < | <= | > | >=
|
||||
IParser<LexicalToken, BinaryOperatorType> relationOperator = Choice(
|
||||
from _ in Operator("=")
|
||||
select BinaryOperatorType.Equal,
|
||||
from _ in Operator("<>")
|
||||
select BinaryOperatorType.NotEqual,
|
||||
from _ in Operator("<")
|
||||
select BinaryOperatorType.Less,
|
||||
from _ in Operator("<=")
|
||||
select BinaryOperatorType.LessEqual,
|
||||
from _ in Operator(">")
|
||||
select BinaryOperatorType.Greater,
|
||||
from _ in Operator(">=")
|
||||
select BinaryOperatorType.GreaterEqual);
|
||||
|
||||
// Expression -> SimpleExpression | SimpleExpression RelationOperator SimpleExpression
|
||||
return Choice(
|
||||
from left in SimpleExpressionParser()
|
||||
from op in relationOperator
|
||||
from right in SimpleExpressionParser()
|
||||
select new BinaryOperatorNode(op, left, right),
|
||||
SimpleExpressionParser()
|
||||
);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// ExpressionList Parser
|
||||
/// ExpressionList -> Expression | ExpressionList , Expression
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
public static IParser<LexicalToken, IEnumerable<SyntaxNodeBase>> ExpressionsParser()
|
||||
=> ExpressionParser().SeparatedBy1(Delimiter(","));
|
||||
|
||||
public static IParser<LexicalToken, VariableNode> VariableParser()
|
||||
{
|
||||
return Choice(
|
||||
from token in IdentifierParser()
|
||||
from _ in Delimiter("[")
|
||||
from expressions in ExpressionsParser()
|
||||
from _1 in Delimiter("]")
|
||||
select new VariableNode(token, expressions),
|
||||
from token in IdentifierParser()
|
||||
select new VariableNode(token)
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, IfNode> IfParser()
|
||||
{
|
||||
IParser<LexicalToken, IfNode> commonPart = from _ in Keyword("if")
|
||||
from condition in ExpressionParser()
|
||||
from _1 in Keyword("then")
|
||||
from statement in StatementParser()
|
||||
select new IfNode(condition, statement);
|
||||
|
||||
return Choice(
|
||||
from common in commonPart
|
||||
from _ in Keyword("else")
|
||||
from elseStatement in StatementParser()
|
||||
select new IfNode(common.Condition, common.Statement, elseStatement),
|
||||
commonPart
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, ForNode> ForParser()
|
||||
{
|
||||
return from _ in Keyword("for")
|
||||
from identifier in IdentifierParser()
|
||||
from _1 in Operator(":=")
|
||||
from left in ExpressionParser()
|
||||
from _2 in Keyword("to")
|
||||
from right in ExpressionParser()
|
||||
from _3 in Keyword("do")
|
||||
from statement in StatementParser()
|
||||
select new ForNode(identifier, left, right, statement);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, WhileNode> WhileParser()
|
||||
{
|
||||
return from _ in Keyword("while")
|
||||
from condition in ExpressionParser()
|
||||
from _1 in Keyword("do")
|
||||
from statement in StatementParser()
|
||||
select new WhileNode(condition, statement);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, ProcedureCallNode> ProcedureCallParser()
|
||||
{
|
||||
return Choice(
|
||||
from identifier in IdentifierParser()
|
||||
from _ in Delimiter("(")
|
||||
from expressions in ExpressionParser().SeparatedBy(Delimiter(","))
|
||||
from _1 in Delimiter(")")
|
||||
select new ProcedureCallNode(identifier, expressions),
|
||||
from identifier in IdentifierParser()
|
||||
select new ProcedureCallNode(identifier, [])
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> StatementParser()
|
||||
{
|
||||
return Choice<LexicalToken, SyntaxNodeBase>(
|
||||
from variable in VariableParser()
|
||||
from _ in Operator(":=")
|
||||
from expression in ExpressionParser()
|
||||
select new AssignNode(variable, expression),
|
||||
ProcedureCallParser(),
|
||||
IfParser(),
|
||||
ForParser(),
|
||||
WhileParser(),
|
||||
CompoundStatementParser()
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, BlockNode> CompoundStatementParser()
|
||||
{
|
||||
return from _1 in Keyword("begin")
|
||||
from statements in StatementParser().SeparatedOrEndBy(Delimiter(";"))
|
||||
from _2 in Keyword("end")
|
||||
select new BlockNode(statements);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> ConstValueParser()
|
||||
{
|
||||
return Choice(
|
||||
from _ in Operator("-")
|
||||
from num in NumberParser()
|
||||
select new UnaryOperatorNode(UnaryOperatorType.Minus, num),
|
||||
from _ in Operator("+")
|
||||
from num in NumberParser()
|
||||
select new UnaryOperatorNode(UnaryOperatorType.Plus, num),
|
||||
NumberParser(),
|
||||
CharParser(),
|
||||
TrueParser(),
|
||||
FalseParser()
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> ConstDeclarationParser()
|
||||
{
|
||||
return from identifier in Satisfy<LexicalToken>(token =>
|
||||
token.TokenType == LexicalTokenType.Identifier)
|
||||
from _ in Operator("=")
|
||||
from node in ConstValueParser()
|
||||
select new ConstantNode(identifier, node);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, BlockNode> ConstDeclarationsParser()
|
||||
{
|
||||
return (from _ in Keyword("const")
|
||||
from tokens in ConstDeclarationParser().SeparatedOrEndBy1(Delimiter(";"))
|
||||
select new BlockNode(tokens)).Try(new BlockNode([]));
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, TypeNode> ArrayTypeParser()
|
||||
{
|
||||
IParser<LexicalToken, IEnumerable<ArrayRange>> arrayRangeParser = (
|
||||
from left in IntegerParser()
|
||||
from _ in Delimiter("..")
|
||||
from right in IntegerParser()
|
||||
select new ArrayRange(left.Value, right.Value)).SeparatedBy1(Delimiter(","));
|
||||
|
||||
return from _ in Keyword("array")
|
||||
from _1 in Delimiter("[")
|
||||
from ranges in arrayRangeParser
|
||||
from _2 in Delimiter("]")
|
||||
from _3 in Keyword("of")
|
||||
from typeToken in BasicTypeParser()
|
||||
select new TypeNode(typeToken, ranges);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SyntaxNodeBase> TypeParser()
|
||||
{
|
||||
return Choice(ArrayTypeParser(),
|
||||
from token in BasicTypeParser()
|
||||
select new TypeNode(token));
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, VariableDeclarationNode> VariableDeclarationParser()
|
||||
{
|
||||
return from tokens in Satisfy<LexicalToken>(
|
||||
token => token.TokenType == LexicalTokenType.Identifier).SeparatedBy1(Delimiter(","))
|
||||
from _1 in Delimiter(":")
|
||||
from type in TypeParser()
|
||||
select new VariableDeclarationNode(tokens, type.Convert<TypeNode>());
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, BlockNode> VariableDeclarationsParser()
|
||||
{
|
||||
return (from _ in Keyword("var")
|
||||
from nodes in VariableDeclarationParser().SeparatedOrEndBy1(Delimiter(";"))
|
||||
select new BlockNode(nodes)).Try(new BlockNode([]));
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, IEnumerable<Parameter>> ParameterParser()
|
||||
{
|
||||
return Choice(
|
||||
from _ in Keyword("var")
|
||||
from tokens in IdentifierParser().SeparatedBy1(Delimiter(","))
|
||||
from _1 in Delimiter(":")
|
||||
from typeToken in TypeParser()
|
||||
select tokens.Select(x => new Parameter(true, x, typeToken)),
|
||||
from tokens in IdentifierParser().SeparatedBy1(Delimiter(","))
|
||||
from _ in Delimiter(":")
|
||||
from typeToken in TypeParser()
|
||||
select tokens.Select(x => new Parameter(false, x, typeToken))
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, IEnumerable<Parameter>> FormalParameterParser()
|
||||
{
|
||||
return (from _ in Delimiter("(")
|
||||
from parameters in ParameterParser().SeparatedBy(Delimiter(";"))
|
||||
from _1 in Delimiter(")")
|
||||
select parameters.Aggregate(new List<Parameter>(), (result, array) =>
|
||||
{
|
||||
result.AddRange(array);
|
||||
return result;
|
||||
})).Try([]);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SubprogramHead> SubprogramHeadParser()
|
||||
{
|
||||
return Choice(
|
||||
from _ in Keyword("procedure")
|
||||
from identifier in IdentifierParser()
|
||||
from parameters in FormalParameterParser()
|
||||
select new SubprogramHead(identifier, parameters),
|
||||
from _ in Keyword("function")
|
||||
from identifier in IdentifierParser()
|
||||
from parameters in FormalParameterParser()
|
||||
from _1 in Delimiter(":")
|
||||
from typeToken in TypeParser()
|
||||
select new SubprogramHead(identifier, parameters, typeToken)
|
||||
);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, SubprogramBody> SubprogramBodyParser()
|
||||
{
|
||||
return from constant in ConstDeclarationsParser()
|
||||
from variables in VariableDeclarationsParser()
|
||||
from block in CompoundStatementParser()
|
||||
select new SubprogramBody(constant, variables, block);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, Subprogram> SubprogramParser()
|
||||
{
|
||||
return from head in SubprogramHeadParser()
|
||||
from _ in Delimiter(";")
|
||||
from body in SubprogramBodyParser()
|
||||
select new Subprogram(head, body);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, ProgramBody> ProgramBodyParser()
|
||||
{
|
||||
return from constant in ConstDeclarationsParser()
|
||||
from variables in VariableDeclarationsParser()
|
||||
from subprograms in SubprogramParser().SeparatedOrEndBy(Delimiter(";"))
|
||||
.Map(x => new BlockNode(x))
|
||||
from block in CompoundStatementParser()
|
||||
select new ProgramBody(constant, variables, subprograms, block);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, ProgramHead> ProgramHeadParser()
|
||||
{
|
||||
return from _ in Keyword("program")
|
||||
from token in Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Identifier)
|
||||
select new ProgramHead(token);
|
||||
}
|
||||
|
||||
public static IParser<LexicalToken, Program> ProgramParser()
|
||||
{
|
||||
return from head in ProgramHeadParser()
|
||||
from _1 in Delimiter(";")
|
||||
from body in ProgramBodyParser()
|
||||
from _2 in Delimiter(".")
|
||||
select new Program(head, body);
|
||||
}
|
||||
}
|
70
CanonSharp.Pascal/Parser/GrammarParserBase.cs
Normal file
70
CanonSharp.Pascal/Parser/GrammarParserBase.cs
Normal file
@@ -0,0 +1,70 @@
|
||||
using CanonSharp.Combinator.Abstractions;
|
||||
using CanonSharp.Combinator.Extensions;
|
||||
using CanonSharp.Pascal.Scanner;
|
||||
using CanonSharp.Pascal.SyntaxTree;
|
||||
using static CanonSharp.Combinator.ParserBuilder;
|
||||
|
||||
namespace CanonSharp.Pascal.Parser;
|
||||
|
||||
public abstract class GrammarParserBuilder
|
||||
{
|
||||
protected static IParser<LexicalToken, LexicalToken> Keyword(string value)
|
||||
=> Satisfy<LexicalToken>(token =>
|
||||
token.TokenType == LexicalTokenType.Keyword &&
|
||||
token.LiteralValue.Equals(value, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
protected static IParser<LexicalToken, LexicalToken> Operator(string value)
|
||||
=> Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Operator && token.LiteralValue == value);
|
||||
|
||||
protected static IParser<LexicalToken, LexicalToken> Delimiter(string value)
|
||||
=> Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Delimiter && token.LiteralValue == value);
|
||||
|
||||
protected static IParser<LexicalToken, BooleanValueNode> TrueParser()
|
||||
{
|
||||
return from _ in Keyword("true")
|
||||
select new BooleanValueNode(true);
|
||||
}
|
||||
|
||||
protected static IParser<LexicalToken, BooleanValueNode> FalseParser()
|
||||
{
|
||||
return from _ in Keyword("false")
|
||||
select new BooleanValueNode(false);
|
||||
}
|
||||
|
||||
protected static IParser<LexicalToken, IntegerValueNode> IntegerParser()
|
||||
{
|
||||
return from token in Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.ConstInteger)
|
||||
select new IntegerValueNode(int.Parse(token.LiteralValue));
|
||||
}
|
||||
|
||||
protected static IParser<LexicalToken, SyntaxNodeBase> NumberParser()
|
||||
{
|
||||
return Choice<LexicalToken, SyntaxNodeBase>(
|
||||
from token in Satisfy<LexicalToken>(x => x.TokenType == LexicalTokenType.ConstInteger)
|
||||
select new IntegerValueNode(int.Parse(token.LiteralValue)),
|
||||
from token in Satisfy<LexicalToken>(x => x.TokenType == LexicalTokenType.ConstFloat)
|
||||
select new FloatValueNode(double.Parse(token.LiteralValue))
|
||||
);
|
||||
}
|
||||
|
||||
protected static IParser<LexicalToken, CharValueNode> CharParser()
|
||||
{
|
||||
return Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Character)
|
||||
.Map(x => new CharValueNode(char.Parse(x.LiteralValue)));
|
||||
}
|
||||
|
||||
protected static IParser<LexicalToken, LexicalToken> BasicTypeParser()
|
||||
{
|
||||
return Choice(
|
||||
Keyword("integer"),
|
||||
Keyword("real"),
|
||||
Keyword("boolean"),
|
||||
Keyword("char")
|
||||
);
|
||||
}
|
||||
|
||||
protected static IParser<LexicalToken, LexicalToken> IdentifierParser()
|
||||
{
|
||||
return Satisfy<LexicalToken>(token => token.TokenType == LexicalTokenType.Identifier);
|
||||
}
|
||||
}
|
53
CanonSharp.Pascal/Parser/LexicalTokenReadState.cs
Normal file
53
CanonSharp.Pascal/Parser/LexicalTokenReadState.cs
Normal file
@@ -0,0 +1,53 @@
|
||||
using CanonSharp.Combinator.Abstractions;
|
||||
using CanonSharp.Pascal.Scanner;
|
||||
|
||||
namespace CanonSharp.Pascal.Parser;
|
||||
|
||||
public sealed class LexicalTokenReadState : IReadState<LexicalToken, LexicalTokenReadState>
|
||||
{
|
||||
private readonly List<LexicalToken> _tokens;
|
||||
private readonly int _pos;
|
||||
|
||||
public LexicalToken Current => _tokens[_pos];
|
||||
|
||||
public bool HasValue => _pos < _tokens.Count;
|
||||
|
||||
public LexicalTokenReadState Next => new(_tokens, _pos + 1);
|
||||
|
||||
private LexicalTokenReadState(List<LexicalToken> tokens, int pos)
|
||||
{
|
||||
_tokens = tokens;
|
||||
_pos = pos;
|
||||
}
|
||||
|
||||
public LexicalTokenReadState(IEnumerable<LexicalToken> tokens)
|
||||
{
|
||||
_tokens = tokens.ToList();
|
||||
_pos = 0;
|
||||
}
|
||||
|
||||
public bool Equals(LexicalTokenReadState? other)
|
||||
{
|
||||
if (other is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_tokens.Count != other._tokens.Count)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach ((LexicalToken first, LexicalToken second) in _tokens.Zip(other._tokens))
|
||||
{
|
||||
if (!first.Equals(second))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return _pos == other._pos;
|
||||
}
|
||||
|
||||
public override string ToString() => HasValue ? Current.ToString() : "End of input stream.";
|
||||
}
|
Reference in New Issue
Block a user