use crate::tokenizer::{LexicalToken, LexicalTokenType}; use nom::branch::alt; use nom::bytes::complete::{tag, take_until}; use nom::character::complete::{ alpha1, alphanumeric1, digit0, digit1, hex_digit0, multispace1, oct_digit0, one_of, }; use nom::combinator::{map, not, peek, recognize, value}; use nom::multi::many0_count; use nom::sequence::{delimited, pair, tuple}; use nom::IResult; use std::str::FromStr; fn keyword_parser(input: &str) -> IResult<&str, LexicalToken> { map( recognize(tuple(( alt(( tag("break"), tag("const"), tag("continue"), tag("else"), tag("float"), tag("if"), tag("int"), tag("return"), tag("void"), tag("while"), )), not(alt((peek(alphanumeric1), tag("_")))), ))), |x| LexicalToken { token_type: LexicalTokenType::Keyword, literal_value: x, }, )(input) } fn delimiter_parser(input: &str) -> IResult<&str, LexicalToken> { map( alt(( tag(","), tag(";"), tag("("), tag(")"), tag("["), tag("]"), tag("{"), tag("}"), )), |x| LexicalToken { token_type: LexicalTokenType::Delimiter, literal_value: x, }, )(input) } fn operator_parser(input: &str) -> IResult<&str, LexicalToken> { map( alt(( tag(">="), tag("<="), tag("=="), tag("!="), tag("&&"), tag("||"), tag("="), tag("+"), tag("-"), tag("!"), tag("*"), tag("/"), tag("%"), tag(">"), tag("<"), )), |x| LexicalToken { token_type: LexicalTokenType::Operator, literal_value: x, }, )(input) } fn identifier_parser(input: &str) -> IResult<&str, LexicalToken> { map( recognize(pair( alt((alpha1, tag("_"))), many0_count(alt((alphanumeric1, tag("_")))), )), |s| LexicalToken { token_type: LexicalTokenType::Identifier, literal_value: s, }, )(input) } fn decimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> { map(recognize(pair(one_of("123456789"), digit0)), |x| { let number = u32::from_str_radix(x, 10).unwrap(); LexicalToken { token_type: LexicalTokenType::ConstInteger(number), literal_value: x, } })(input) } fn octal_integer_parser(input: &str) -> IResult<&str, LexicalToken> { map(recognize(pair(tag("0"), oct_digit0)), |x| { let number = u32::from_str_radix(x, 8).unwrap(); LexicalToken { token_type: LexicalTokenType::ConstInteger(number), literal_value: x, } })(input) } fn hexadecimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> { map( recognize(pair(alt((tag("0x"), tag("0X"))), hex_digit0)), |x: &str| { let number = u32::from_str_radix(&x[2..], 16).unwrap(); LexicalToken { token_type: LexicalTokenType::ConstInteger(number), literal_value: x, } }, )(input) } fn integer_parser(input: &str) -> IResult<&str, LexicalToken> { alt(( hexadecimal_integer_parser, octal_integer_parser, decimal_integer_parser, ))(input) } fn float_parser(input: &str) -> IResult<&str, LexicalToken> { map(recognize(tuple((digit1, tag("."), digit1))), |x| { let number = f32::from_str(x).unwrap(); LexicalToken { token_type: LexicalTokenType::ConstFloat(number), literal_value: x, } })(input) } fn literal_string_parser(input: &str) -> IResult<&str, LexicalToken> { map(delimited(tag("\""), take_until("\""), tag("\"")), |s| { LexicalToken { token_type: LexicalTokenType::LiteralString, literal_value: s, } })(input) } fn comments_parser(input: &str) -> IResult<&str, ()> { alt(( value((), tuple((tag("//"), take_until("\n"), tag("\n")))), value((), tuple((tag("/*"), take_until("*/"), tag("*/")))), ))(input) } pub fn junk_parser(input: &str) -> IResult<&str, ()> { alt((value((), multispace1), comments_parser))(input) } pub fn combine_parser(input: &str) -> IResult<&str, LexicalToken> { alt(( float_parser, integer_parser, literal_string_parser, keyword_parser, identifier_parser, delimiter_parser, operator_parser, ))(input) }