zero-parser/tests/tokenizer/nom_parsers.rs

181 lines
4.7 KiB
Rust
Raw Normal View History

2024-11-19 19:08:05 +08:00
use crate::tokenizer::{LexicalToken, LexicalTokenType};
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until};
use nom::character::complete::{
alpha1, alphanumeric1, digit0, digit1, hex_digit0, multispace1, oct_digit0, one_of,
};
use nom::combinator::{map, not, peek, recognize, value};
use nom::multi::many0_count;
use nom::sequence::{delimited, pair, tuple};
use nom::IResult;
use std::str::FromStr;
fn keyword_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
recognize(tuple((
alt((
tag("break"),
tag("const"),
tag("continue"),
tag("else"),
tag("float"),
tag("if"),
tag("int"),
tag("return"),
tag("void"),
tag("while"),
)),
not(alt((peek(alphanumeric1), tag("_")))),
))),
|x| LexicalToken {
token_type: LexicalTokenType::Keyword,
literal_value: x,
},
)(input)
}
fn delimiter_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
alt((
tag(","),
tag(";"),
tag("("),
tag(")"),
tag("["),
tag("]"),
tag("{"),
tag("}"),
)),
|x| LexicalToken {
token_type: LexicalTokenType::Delimiter,
literal_value: x,
},
)(input)
}
fn operator_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
alt((
tag(">="),
tag("<="),
tag("=="),
tag("!="),
tag("&&"),
tag("||"),
tag("="),
tag("+"),
tag("-"),
tag("!"),
tag("*"),
tag("/"),
tag("%"),
tag(">"),
tag("<"),
)),
|x| LexicalToken {
token_type: LexicalTokenType::Operator,
literal_value: x,
},
)(input)
}
fn identifier_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
recognize(pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_")))),
)),
|s| LexicalToken {
token_type: LexicalTokenType::Identifier,
literal_value: s,
},
)(input)
}
fn decimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
map(recognize(pair(one_of("123456789"), digit0)), |x| {
let number = u32::from_str_radix(x, 10).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})(input)
}
fn octal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
map(recognize(pair(tag("0"), oct_digit0)), |x| {
let number = u32::from_str_radix(x, 8).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})(input)
}
fn hexadecimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
recognize(pair(alt((tag("0x"), tag("0X"))), hex_digit0)),
|x: &str| {
let number = u32::from_str_radix(&x[2..], 16).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
},
)(input)
}
fn integer_parser(input: &str) -> IResult<&str, LexicalToken> {
alt((
hexadecimal_integer_parser,
octal_integer_parser,
decimal_integer_parser,
))(input)
}
fn float_parser(input: &str) -> IResult<&str, LexicalToken> {
map(recognize(tuple((digit1, tag("."), digit1))), |x| {
let number = f32::from_str(x).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstFloat(number),
literal_value: x,
}
})(input)
}
fn literal_string_parser(input: &str) -> IResult<&str, LexicalToken> {
map(delimited(tag("\""), take_until("\""), tag("\"")), |s| {
LexicalToken {
token_type: LexicalTokenType::LiteralString,
literal_value: s,
}
})(input)
}
fn comments_parser(input: &str) -> IResult<&str, ()> {
alt((
value((), tuple((tag("//"), take_until("\n"), tag("\n")))),
value((), tuple((tag("/*"), take_until("*/"), tag("*/")))),
))(input)
}
pub fn junk_parser(input: &str) -> IResult<&str, ()> {
alt((value((), multispace1), comments_parser))(input)
}
pub fn combine_parser(input: &str) -> IResult<&str, LexicalToken> {
alt((
float_parser,
integer_parser,
literal_string_parser,
keyword_parser,
identifier_parser,
delimiter_parser,
operator_parser,
))(input)
}