From e51d02dd6fc4c7862955995930e18ce1c82c0fa2 Mon Sep 17 00:00:00 2001 From: jackfiled Date: Wed, 11 Sep 2024 18:11:06 +0800 Subject: [PATCH] add: integer_node_parser --- src/lib.rs | 3 +- src/parser.rs | 3 + src/parser/grammar_parser.rs | 32 ++++ src/parser/syntax_tree.rs | 22 +++ src/tokenizer.rs | 12 +- .../{parser.rs => lexical_parser.rs} | 0 src/tokenizer/lexical_token.rs | 143 ++++++++++++++++++ tests/lexical_tests.rs | 11 +- 8 files changed, 221 insertions(+), 5 deletions(-) create mode 100644 src/parser.rs create mode 100644 src/parser/grammar_parser.rs create mode 100644 src/parser/syntax_tree.rs rename src/tokenizer/{parser.rs => lexical_parser.rs} (100%) create mode 100644 src/tokenizer/lexical_token.rs diff --git a/src/lib.rs b/src/lib.rs index 5768976..178698f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ -pub mod tokenizer; \ No newline at end of file +pub mod tokenizer; +pub mod parser; \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..3f4c1db --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,3 @@ +mod syntax_tree; +mod grammar_parser; + diff --git a/src/parser/grammar_parser.rs b/src/parser/grammar_parser.rs new file mode 100644 index 0000000..af7288a --- /dev/null +++ b/src/parser/grammar_parser.rs @@ -0,0 +1,32 @@ +use crate::parser::syntax_tree::SyntaxNode; +use crate::tokenizer::{LexicalTokenSpan, LexicalTokenType}; +use nom::bytes::complete::tag; +use nom::combinator::map; +use nom::{IResult}; + +fn integer_node_parser(input: LexicalTokenSpan) -> IResult { + map(tag(LexicalTokenType::ConstInteger(0)), + |t: LexicalTokenSpan| { + if let LexicalTokenType::ConstInteger(number) = t.span[0].token_type { + SyntaxNode::const_integer(number, t) + } else { + panic!("Illegal integer constant: {}", t.as_str()) + } + })(input) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::parser::syntax_tree::SyntaxNodeType; + use crate::tokenizer::lexical_parser; + + #[test] + fn number_parser_test() { + let (_, input) = lexical_parser("123").unwrap(); + dbg!(&input); + let (_, node) = integer_node_parser((&input).into()).unwrap(); + + assert_eq!(SyntaxNodeType::ConstIntegerNode(123), node.node_type); + } +} \ No newline at end of file diff --git a/src/parser/syntax_tree.rs b/src/parser/syntax_tree.rs new file mode 100644 index 0000000..215c8c7 --- /dev/null +++ b/src/parser/syntax_tree.rs @@ -0,0 +1,22 @@ +use crate::tokenizer::LexicalTokenSpan; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum SyntaxNodeType { + ConstIntegerNode(u32), + ConstFloatNode(f32), +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct SyntaxNode<'a> { + pub node_type: SyntaxNodeType, + pub lexical_tokens: LexicalTokenSpan<'a> +} + +impl<'a> SyntaxNode<'a> { + pub fn const_integer(number: u32, span: LexicalTokenSpan<'a>) -> Self { + SyntaxNode { + node_type: SyntaxNodeType::ConstIntegerNode(number), + lexical_tokens: span + } + } +} \ No newline at end of file diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 197b016..7f11946 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,9 +1,10 @@ use nom::IResult; -use crate::tokenizer::parser::{combine_parser, junk_parser}; +use crate::tokenizer::lexical_parser::{combine_parser, junk_parser}; -mod parser; +mod lexical_parser; +mod lexical_token; -#[derive(PartialEq, Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone)] pub enum LexicalTokenType { Identifier, ConstInteger(u32), @@ -19,6 +20,11 @@ pub struct LexicalToken<'a> { pub literal_value: &'a str, } +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct LexicalTokenSpan<'a> { + pub span: &'a [LexicalToken<'a>] +} + pub fn lexical_parser(mut input: &str) -> IResult<&str, Vec> { let mut array = vec![]; diff --git a/src/tokenizer/parser.rs b/src/tokenizer/lexical_parser.rs similarity index 100% rename from src/tokenizer/parser.rs rename to src/tokenizer/lexical_parser.rs diff --git a/src/tokenizer/lexical_token.rs b/src/tokenizer/lexical_token.rs new file mode 100644 index 0000000..e09016f --- /dev/null +++ b/src/tokenizer/lexical_token.rs @@ -0,0 +1,143 @@ +use crate::tokenizer::{LexicalToken, LexicalTokenSpan, LexicalTokenType}; +use nom::{Compare, CompareResult, InputLength, InputTake}; + +impl<'a> LexicalTokenSpan<'a> { + pub fn new(span: &'a [LexicalToken]) -> Self { + Self { + span + } + } +} + +impl<'a> From<&'a [LexicalToken<'a>]> for LexicalTokenSpan<'a> { + fn from(value: &'a [LexicalToken<'a>]) -> Self { + LexicalTokenSpan::new(value) + } +} + +impl<'a> From<&'a Vec>> for LexicalTokenSpan<'a> { + fn from(value: &'a Vec>) -> Self { + LexicalTokenSpan::new(value.as_slice()) + } +} + +impl LexicalTokenSpan<'_> { + pub fn as_str(&self) -> &str { + self.span[0].literal_value + } +} + +impl PartialEq for LexicalTokenType { + fn eq(&self, other: &Self) -> bool { + match self { + LexicalTokenType::ConstInteger(_) => { + match other { + LexicalTokenType::ConstInteger(_) => true, + _ => false + } + } + LexicalTokenType::ConstFloat(_) => { + match other { + LexicalTokenType::ConstFloat(_) => true, + _ => false + } + } + LexicalTokenType::Keyword => { + match other { + LexicalTokenType::Keyword => true, + _ => false + } + } + LexicalTokenType::Identifier => { + match other { + LexicalTokenType::Identifier => true, + _ => false + } + } + LexicalTokenType::Delimiter => { + match other { + LexicalTokenType::Delimiter => true, + _ => false + } + } + LexicalTokenType::Operator => { + match other { + LexicalTokenType::Operator => true, + _ => false + } + } + } + } +} + +impl InputLength for LexicalTokenType { + fn input_len(&self) -> usize { + 1 + } +} + +impl InputLength for LexicalToken<'_> { + fn input_len(&self) -> usize { + 1 + } +} + +impl Compare for LexicalTokenSpan<'_> { + fn compare(&self, t: LexicalTokenType) -> CompareResult { + if self.span.is_empty() { + CompareResult::Incomplete + } else if self.span[0].token_type == t { + CompareResult::Ok + } else { + CompareResult::Error + } + } + + fn compare_no_case(&self, t: LexicalTokenType) -> CompareResult { + self.compare(t) + } +} + +impl<'a> Compare> for LexicalTokenSpan<'a> { + fn compare(&self, t: LexicalToken) -> CompareResult { + if self.span.is_empty() { + CompareResult::Incomplete + } else if self.span[0] == t { + CompareResult::Ok + } else { + CompareResult::Error + } + } + + fn compare_no_case(&self, t: LexicalToken) -> CompareResult { + self.compare(t) + } +} + +impl InputTake for LexicalTokenSpan<'_> { + fn take(&self, count: usize) -> Self { + LexicalTokenSpan::new(&self.span[..count]) + } + + fn take_split(&self, count: usize) -> (Self, Self) { + let (prefix, suffix) = self.span.split_at(count); + // 先返回suffix,再返回prefix + // 这TM谁能第一次写对 + (LexicalTokenSpan::new(suffix), LexicalTokenSpan::new(prefix)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lexical_token_type_test() { + assert_eq!(LexicalTokenType::Identifier, LexicalTokenType::Identifier); + assert_eq!(LexicalTokenType::ConstInteger(1), LexicalTokenType::ConstInteger(2)); + assert_eq!(LexicalTokenType::ConstFloat(3.0), LexicalTokenType::ConstFloat(3.0)); + + assert_ne!(LexicalTokenType::Identifier, LexicalTokenType::Keyword); + assert_ne!(LexicalTokenType::ConstInteger(1), LexicalTokenType::Operator); + } +} \ No newline at end of file diff --git a/tests/lexical_tests.rs b/tests/lexical_tests.rs index 2e3add0..78dca08 100644 --- a/tests/lexical_tests.rs +++ b/tests/lexical_tests.rs @@ -14,10 +14,12 @@ fn validate_tokens(input: &'static str, tokens: Vec) { #[test] fn main_test() { - validate_tokens("int main { return 0; }", vec![ + validate_tokens("int main() { return 0; }", vec![ Keyword, Identifier, Delimiter, + Delimiter, + Delimiter, Keyword, ConstInteger(0), Delimiter, @@ -25,6 +27,13 @@ fn main_test() { ]); } +#[test] +fn number_test() { + validate_tokens("123", vec![ + ConstInteger(123) + ]) +} + #[test] fn hexadecimal_test() { validate_tokens("// test hexadecimal define