From e278a5dcc629e14da2bb8a3bced3714e27e3e93a Mon Sep 17 00:00:00 2001 From: jackfiled Date: Fri, 20 Sep 2024 17:46:12 +0800 Subject: [PATCH] add: grammar parsers up to statement. --- src/parser/grammar_parser.rs | 384 ++++++++++++++++++++++++++++++++++- src/parser/syntax_tree.rs | 123 +++++++++++ 2 files changed, 504 insertions(+), 3 deletions(-) diff --git a/src/parser/grammar_parser.rs b/src/parser/grammar_parser.rs index 72ebefc..c3ef659 100644 --- a/src/parser/grammar_parser.rs +++ b/src/parser/grammar_parser.rs @@ -1,13 +1,14 @@ use crate::parser::syntax_tree::{BinaryNodeType, SyntaxNode, UnaryNodeType}; use crate::tokenizer::{LexicalTokenSpan, LexicalTokenType}; use nom::branch::alt; -use nom::combinator::map; -use nom::multi::many0; +use nom::combinator::{map, opt}; +use nom::multi::{many0, separated_list0}; use nom::sequence::tuple; use nom::IResult; use std::cell::RefCell; use std::rc::Rc; +/// 匹配词法令牌所用的宏展开 macro_rules! lexical { (Integer) => { nom::bytes::complete::tag( @@ -86,12 +87,61 @@ fn identifier_parser(cursor: LexicalTokenSpan) -> IResult = (Rc>, Vec<(LexicalTokenSpan<'a>, Rc>, LexicalTokenSpan<'a>)>); + +fn left_value_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + identifier_parser, + many0(tuple(( + lexical!(Delimiter("[")), + expression_parser, + lexical!(Delimiter("]")) + ))) + )), + |(first, second): LeftValueParseType| { + let mut node = first; + + for (_, index_expression, _) in second { + node = SyntaxNode::binary_node(BinaryNodeType::Index, &node, &index_expression); + } + + node + }, + )(cursor) +} + +/// 处理没有任何后缀(即数组下标)的基本表达式 fn primary_parser(curser: LexicalTokenSpan) -> IResult>> { + // primary_parser -> '(' expression_parser ')' | identifier '(' [ expression_parser (',' expression_parser)* ] ')' | + // left_value_parser | identifier_parser | integer_node_parser | float_node_parser | literal_string_node_parser alt(( + map( + tuple(( + lexical!(Delimiter ("(")), + expression_parser, + lexical!(Delimiter (")")) + )), + |(_, expression, _)| { + expression + }, + ), + map( + tuple(( + identifier_parser, + lexical!(Delimiter("(")), + separated_list0(lexical!(Delimiter (",")), expression_parser), + lexical!(Delimiter(")")) + )), + |(identifier, _, arguments, _)| { + SyntaxNode::function_call(&identifier, &arguments) + }, + ), + left_value_parser, identifier_parser, integer_node_parser, float_node_parser, - literal_string_node_parser + literal_string_node_parser, ))(curser) } @@ -328,6 +378,148 @@ fn condition_parser(cursor: LexicalTokenSpan) -> IResult IResult>> { + add_parser(cursor) +} + +fn assign_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + left_value_parser, + lexical!(Operator("=")), + expression_parser, + lexical!(Delimiter(";")) + )), + |(left_value, _, expresison, _)| { + SyntaxNode::assign_statement(&left_value, &expresison) + }, + )(cursor) +} + +fn expression_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + opt(expression_parser), + lexical!(Delimiter(";")) + )), + |(expression, _)| { + match expression { + Some(expression) => SyntaxNode::expression_statement(&expression), + None => SyntaxNode::expression_statement(&SyntaxNode::unit()) + } + }, + )(cursor) +} + +fn if_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + lexical!(Keyword("if")), + lexical!(Delimiter("(")), + condition_parser, + lexical!(Delimiter(")")), + statement_parser, + opt( + tuple(( + lexical!(Keyword("else")), + statement_parser + )) + ) + )), + |(_, _, condition, _, if_statement, else_part)| { + let else_statement = match else_part { + Some(else_part) => else_part.1, + None => SyntaxNode::unit() + }; + + SyntaxNode::if_statement(&condition, &if_statement, &else_statement) + }, + )(cursor) +} + +fn while_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + lexical!(Keyword("while")), + lexical!(Delimiter("(")), + condition_parser, + lexical!(Delimiter(")")), + statement_parser + )), + |(_, _, condition, _, statement)| { + SyntaxNode::while_statement(&condition, &statement) + }, + )(cursor) +} + +fn break_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + lexical!(Keyword("break")), + lexical!(Delimiter(";")) + )), + |_| { + SyntaxNode::break_statement() + }, + )(cursor) +} + +fn continue_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + lexical!(Keyword("continue")), + lexical!(Delimiter(";")) + )), + |_| { + SyntaxNode::continue_statement() + }, + )(cursor) +} + +fn return_statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + lexical!(Keyword("return")), + opt(expression_parser), + lexical!(Delimiter(";")) + )), + |(_, expression, _)| { + match expression { + None => SyntaxNode::return_statement(&SyntaxNode::unit()), + Some(expression) => SyntaxNode::return_statement(&expression) + } + }, + )(cursor) +} + +fn statement_parser(cursor: LexicalTokenSpan) -> IResult>> { + alt(( + if_statement_parser, + while_statement_parser, + break_statement_parser, + continue_statement_parser, + return_statement_parser, + block_parser, + assign_statement_parser, + expression_statement_parser, + ))(cursor) +} + +fn block_parser(cursor: LexicalTokenSpan) -> IResult>> { + map( + tuple(( + lexical!(Delimiter("{")), + many0(alt(( + statement_parser, + ))), + lexical!(Delimiter("}")) + )), + |(_, statements, _): (LexicalTokenSpan, Vec>>, LexicalTokenSpan)| { + SyntaxNode::block(&statements) + }, + )(cursor) +} + #[cfg(test)] mod test { use super::*; @@ -511,4 +703,190 @@ mod test { validate_syntax_node("i != 1 && i != 2", &node.borrow().node_type, condition_parser); } + + #[test] + fn expression_with_parethness_test() { + let node = SyntaxNode::binary_node(BinaryNodeType::Multiply, + &SyntaxNode::const_integer(3), + &SyntaxNode::binary_node(BinaryNodeType::Subtract, + &SyntaxNode::const_integer(5), + &SyntaxNode::const_integer(2))); + + validate_syntax_node("3 * (5 - 2)", &node.borrow().node_type, expression_parser); + } + + #[test] + fn array_index_expression_test() { + let node = SyntaxNode::binary_node(BinaryNodeType::Index, + &SyntaxNode::identifier("array".to_owned()), + &SyntaxNode::const_integer(0)); + + validate_syntax_node("array[0]", &node.borrow().node_type, expression_parser); + + let node = SyntaxNode::binary_node(BinaryNodeType::Index, + &SyntaxNode::binary_node(BinaryNodeType::Index, + &SyntaxNode::identifier("array".to_owned()), + &SyntaxNode::const_integer(0)), + &SyntaxNode::const_integer(0)); + + validate_syntax_node("array[0][0]", &node.borrow().node_type, expression_parser); + + let node = SyntaxNode::binary_node(BinaryNodeType::Index, + &SyntaxNode::identifier("array".to_owned()), + &SyntaxNode::binary_node(BinaryNodeType::Add, + &SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(1))); + + validate_syntax_node("array[i + 1]", &node.borrow().node_type, expression_parser); + } + + #[test] + fn function_call_expression_test() { + let node = SyntaxNode::function_call(&SyntaxNode::identifier("empty_function".to_owned()), + &vec![]); + + validate_syntax_node("empty_function()", &node.borrow().node_type, expression_parser); + + let node = SyntaxNode::function_call(&SyntaxNode::identifier("add".to_owned()), + &vec![SyntaxNode::const_integer(1), SyntaxNode::const_integer(1)]); + + validate_syntax_node("add(1, 1)", &node.borrow().node_type, expression_parser); + } + + #[test] + fn assign_statement_test() { + let node = SyntaxNode::assign_statement( + &SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(1), + ); + + validate_syntax_node("i = 1;", &node.borrow().node_type, assign_statement_parser); + validate_syntax_node("i = 1;", &node.borrow().node_type, statement_parser); + + let node = SyntaxNode::assign_statement( + &SyntaxNode::binary_node(BinaryNodeType::Index, + &SyntaxNode::identifier("array".to_owned()), + &SyntaxNode::const_integer(10)), + &SyntaxNode::identifier("a".to_owned()), + ); + + validate_syntax_node("array[10] = a;", &node.borrow().node_type, assign_statement_parser); + validate_syntax_node("array[10] = a;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn expression_statement_test() { + let node = SyntaxNode::expression_statement(&SyntaxNode::unit()); + + validate_syntax_node(";", &node.borrow().node_type, expression_statement_parser); + validate_syntax_node(";", &node.borrow().node_type, statement_parser); + + let node = SyntaxNode::expression_statement(&SyntaxNode::binary_node( + BinaryNodeType::Add, + &SyntaxNode::const_integer(1), + &SyntaxNode::const_integer(1), + )); + + validate_syntax_node("1 + 1;", &node.borrow().node_type, expression_statement_parser); + validate_syntax_node("1 + 1;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn if_statement_test() { + let node = SyntaxNode::if_statement( + &SyntaxNode::binary_node(BinaryNodeType::Equal, + &SyntaxNode::identifier("i".to_owned()), &SyntaxNode::const_integer(1)), + &SyntaxNode::assign_statement(&SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(2)), + &SyntaxNode::assign_statement(&SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(1)), + ); + + validate_syntax_node("if (i == 1) i = 2; else i = 1;", &node.borrow().node_type, if_statement_parser); + validate_syntax_node("if (i == 1) i = 2; else i = 1;", &node.borrow().node_type, statement_parser); + + let node = SyntaxNode::if_statement( + &SyntaxNode::binary_node(BinaryNodeType::Equal, + &SyntaxNode::identifier("i".to_owned()), &SyntaxNode::const_integer(1)), + &SyntaxNode::assign_statement(&SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(2)), + &SyntaxNode::unit() + ); + + validate_syntax_node("if (i == 1) i = 2;", &node.borrow().node_type, if_statement_parser); + validate_syntax_node("if (i == 1) i = 2;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn multiplx_if_statement_test() { + let node = SyntaxNode::if_statement( + &SyntaxNode::binary_node(BinaryNodeType::Equal, + &SyntaxNode::identifier("i".to_owned()), &SyntaxNode::const_integer(1)), + &SyntaxNode::if_statement( + &SyntaxNode::binary_node(BinaryNodeType::Equal, + &SyntaxNode::identifier("i".to_owned()), &SyntaxNode::const_integer(2)), + &SyntaxNode::assign_statement(&SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(2)), + &SyntaxNode::assign_statement(&SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(1)) + ), + &SyntaxNode::unit() + ); + + validate_syntax_node("if (i == 1) if (i == 2) i = 2; else i = 1;", &node.borrow().node_type, if_statement_parser); + validate_syntax_node("if (i == 1) if (i == 2) i = 2; else i = 1;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn while_statement_test() { + let node = SyntaxNode::while_statement( + &SyntaxNode::binary_node(BinaryNodeType::NotEqual, + &SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(10)), + &SyntaxNode::assign_statement( + &SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::binary_node(BinaryNodeType::Add, + &SyntaxNode::identifier("i".to_owned()), + &SyntaxNode::const_integer(1)) + ) + ); + + validate_syntax_node("while (i != 10 ) i = i + 1;", &node.borrow().node_type, while_statement_parser); + validate_syntax_node("while (i != 10 ) i = i + 1;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn break_statement_test() { + let node = SyntaxNode::break_statement(); + + validate_syntax_node("break;", &node.borrow().node_type, break_statement_parser); + validate_syntax_node("break;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn continue_statement_test() { + let node = SyntaxNode::continue_statement(); + + validate_syntax_node("continue;", &node.borrow().node_type, continue_statement_parser); + validate_syntax_node("continue;", &node.borrow().node_type, statement_parser); + } + + #[test] + fn return_statement_test() { + let node = SyntaxNode::return_statement( + &SyntaxNode::binary_node(BinaryNodeType::Add, + &SyntaxNode::const_integer(1), + &SyntaxNode::const_integer(1)) + ); + + validate_syntax_node("return 1 + 1;", &node.borrow().node_type, return_statement_parser); + validate_syntax_node("return 1 + 1;", &node.borrow().node_type, statement_parser); + + let node = SyntaxNode::return_statement( + &SyntaxNode::unit() + ); + + validate_syntax_node("return;", &node.borrow().node_type, return_statement_parser); + validate_syntax_node("return;", &node.borrow().node_type, statement_parser); + } } \ No newline at end of file diff --git a/src/parser/syntax_tree.rs b/src/parser/syntax_tree.rs index 04ee9a7..e3bbff4 100644 --- a/src/parser/syntax_tree.rs +++ b/src/parser/syntax_tree.rs @@ -1,5 +1,6 @@ use std::cell::RefCell; use std::rc::Rc; +use nom::IResult; /// 单元表达式类型 #[derive(Debug, PartialEq)] @@ -32,6 +33,7 @@ pub enum BinaryNodeType { NotEqual, And, Or, + Index, } /// 双元表达式节点 @@ -42,15 +44,59 @@ pub struct BinaryNode { pub right: Rc>, } +#[derive(Debug, PartialEq)] +pub struct FunctionCallNode { + pub function: Rc>, + pub arguments: Vec>>, +} + +#[derive(Debug, PartialEq)] +pub enum BasicTypes { + Void, + Integer, + Float, +} + +#[derive(Debug, PartialEq)] +pub struct IfStatementNode { + pub condition: Rc>, + pub if_statement: Rc>, + pub else_statement: Rc>, +} + +#[derive(Debug, PartialEq)] +pub struct WhileStatmentNode { + pub condition: Rc>, + pub statement: Rc>, +} + +#[derive(Debug, PartialEq)] +pub struct AssignStatmentNode { + pub left_value: Rc>, + pub expression: Rc>, +} + /// 语法分析树节点类型 #[derive(Debug, PartialEq)] pub enum SyntaxNodeType { + /// 空白的语法分析树节点 + Unit, + BreakNode, + ContinueNode, ConstIntegerNode(u32), ConstFloatNode(f32), LiteralStringNode(String), IdentifierNode(String), UnaryExpression(UnaryNode), BinaryExpression(BinaryNode), + FunctionCall(FunctionCallNode), + IfStatement(IfStatementNode), + WhileStatement(WhileStatmentNode), + AssignStatement(AssignStatmentNode), + ExpressionStatement(Rc>), + ReturnStatement(Rc>), + Block(Vec>>), + BasicTypeNode(BasicTypes), } /// 语法分析器节点 @@ -104,4 +150,81 @@ impl SyntaxNode { }) })) } + + pub fn function_call(function: &Rc>, arguments: &Vec>>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::FunctionCall(FunctionCallNode { + function: Rc::clone(function), + arguments: arguments.iter().map(|item| Rc::clone(item)).collect(), + }) + })) + } + + pub fn assign_statement(left_value: &Rc>, expression: &Rc>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::AssignStatement(AssignStatmentNode { + left_value: Rc::clone(left_value), + expression: Rc::clone(expression), + }) + })) + } + + pub fn expression_statement(expression: &Rc>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::ExpressionStatement(Rc::clone(expression)) + })) + } + + pub fn return_statement(expression: &Rc>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::ReturnStatement(Rc::clone(expression)) + })) + } + + pub fn unit() -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::Unit + })) + } + + pub fn break_statement() -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::BreakNode + })) + } + + pub fn continue_statement() -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::ContinueNode + })) + } + + pub fn if_statement(condition: &Rc>, + if_statement: &Rc>, + else_statement: &Rc>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::IfStatement(IfStatementNode { + condition: Rc::clone(condition), + if_statement: Rc::clone(if_statement), + else_statement: Rc::clone(else_statement) + }) + })) + } + + pub fn while_statement(condition: &Rc>, statement: &Rc>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::WhileStatement(WhileStatmentNode { + condition: Rc::clone(condition), + statement: Rc::clone(statement) + }) + })) + } + + pub fn block(statements: &Vec>>) -> Rc> { + Rc::new(RefCell::new(Self { + node_type: SyntaxNodeType::Block(statements.iter().map(|x| Rc::clone(x)).collect()) + })) + } + + } \ No newline at end of file