181 lines
4.7 KiB
Rust
181 lines
4.7 KiB
Rust
|
use crate::tokenizer::{LexicalToken, LexicalTokenType};
|
||
|
use nom::branch::alt;
|
||
|
use nom::bytes::complete::{tag, take_until};
|
||
|
use nom::character::complete::{
|
||
|
alpha1, alphanumeric1, digit0, digit1, hex_digit0, multispace1, oct_digit0, one_of,
|
||
|
};
|
||
|
use nom::combinator::{map, not, peek, recognize, value};
|
||
|
use nom::multi::many0_count;
|
||
|
use nom::sequence::{delimited, pair, tuple};
|
||
|
use nom::IResult;
|
||
|
use std::str::FromStr;
|
||
|
|
||
|
fn keyword_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(
|
||
|
recognize(tuple((
|
||
|
alt((
|
||
|
tag("break"),
|
||
|
tag("const"),
|
||
|
tag("continue"),
|
||
|
tag("else"),
|
||
|
tag("float"),
|
||
|
tag("if"),
|
||
|
tag("int"),
|
||
|
tag("return"),
|
||
|
tag("void"),
|
||
|
tag("while"),
|
||
|
)),
|
||
|
not(alt((peek(alphanumeric1), tag("_")))),
|
||
|
))),
|
||
|
|x| LexicalToken {
|
||
|
token_type: LexicalTokenType::Keyword,
|
||
|
literal_value: x,
|
||
|
},
|
||
|
)(input)
|
||
|
}
|
||
|
|
||
|
fn delimiter_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(
|
||
|
alt((
|
||
|
tag(","),
|
||
|
tag(";"),
|
||
|
tag("("),
|
||
|
tag(")"),
|
||
|
tag("["),
|
||
|
tag("]"),
|
||
|
tag("{"),
|
||
|
tag("}"),
|
||
|
)),
|
||
|
|x| LexicalToken {
|
||
|
token_type: LexicalTokenType::Delimiter,
|
||
|
literal_value: x,
|
||
|
},
|
||
|
)(input)
|
||
|
}
|
||
|
|
||
|
fn operator_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(
|
||
|
alt((
|
||
|
tag(">="),
|
||
|
tag("<="),
|
||
|
tag("=="),
|
||
|
tag("!="),
|
||
|
tag("&&"),
|
||
|
tag("||"),
|
||
|
tag("="),
|
||
|
tag("+"),
|
||
|
tag("-"),
|
||
|
tag("!"),
|
||
|
tag("*"),
|
||
|
tag("/"),
|
||
|
tag("%"),
|
||
|
tag(">"),
|
||
|
tag("<"),
|
||
|
)),
|
||
|
|x| LexicalToken {
|
||
|
token_type: LexicalTokenType::Operator,
|
||
|
literal_value: x,
|
||
|
},
|
||
|
)(input)
|
||
|
}
|
||
|
|
||
|
fn identifier_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(
|
||
|
recognize(pair(
|
||
|
alt((alpha1, tag("_"))),
|
||
|
many0_count(alt((alphanumeric1, tag("_")))),
|
||
|
)),
|
||
|
|s| LexicalToken {
|
||
|
token_type: LexicalTokenType::Identifier,
|
||
|
literal_value: s,
|
||
|
},
|
||
|
)(input)
|
||
|
}
|
||
|
|
||
|
fn decimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(recognize(pair(one_of("123456789"), digit0)), |x| {
|
||
|
let number = u32::from_str_radix(x, 10).unwrap();
|
||
|
|
||
|
LexicalToken {
|
||
|
token_type: LexicalTokenType::ConstInteger(number),
|
||
|
literal_value: x,
|
||
|
}
|
||
|
})(input)
|
||
|
}
|
||
|
|
||
|
fn octal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(recognize(pair(tag("0"), oct_digit0)), |x| {
|
||
|
let number = u32::from_str_radix(x, 8).unwrap();
|
||
|
|
||
|
LexicalToken {
|
||
|
token_type: LexicalTokenType::ConstInteger(number),
|
||
|
literal_value: x,
|
||
|
}
|
||
|
})(input)
|
||
|
}
|
||
|
|
||
|
fn hexadecimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(
|
||
|
recognize(pair(alt((tag("0x"), tag("0X"))), hex_digit0)),
|
||
|
|x: &str| {
|
||
|
let number = u32::from_str_radix(&x[2..], 16).unwrap();
|
||
|
|
||
|
LexicalToken {
|
||
|
token_type: LexicalTokenType::ConstInteger(number),
|
||
|
literal_value: x,
|
||
|
}
|
||
|
},
|
||
|
)(input)
|
||
|
}
|
||
|
|
||
|
fn integer_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
alt((
|
||
|
hexadecimal_integer_parser,
|
||
|
octal_integer_parser,
|
||
|
decimal_integer_parser,
|
||
|
))(input)
|
||
|
}
|
||
|
|
||
|
fn float_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(recognize(tuple((digit1, tag("."), digit1))), |x| {
|
||
|
let number = f32::from_str(x).unwrap();
|
||
|
|
||
|
LexicalToken {
|
||
|
token_type: LexicalTokenType::ConstFloat(number),
|
||
|
literal_value: x,
|
||
|
}
|
||
|
})(input)
|
||
|
}
|
||
|
|
||
|
fn literal_string_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
map(delimited(tag("\""), take_until("\""), tag("\"")), |s| {
|
||
|
LexicalToken {
|
||
|
token_type: LexicalTokenType::LiteralString,
|
||
|
literal_value: s,
|
||
|
}
|
||
|
})(input)
|
||
|
}
|
||
|
|
||
|
fn comments_parser(input: &str) -> IResult<&str, ()> {
|
||
|
alt((
|
||
|
value((), tuple((tag("//"), take_until("\n"), tag("\n")))),
|
||
|
value((), tuple((tag("/*"), take_until("*/"), tag("*/")))),
|
||
|
))(input)
|
||
|
}
|
||
|
|
||
|
pub fn junk_parser(input: &str) -> IResult<&str, ()> {
|
||
|
alt((value((), multispace1), comments_parser))(input)
|
||
|
}
|
||
|
|
||
|
pub fn combine_parser(input: &str) -> IResult<&str, LexicalToken> {
|
||
|
alt((
|
||
|
float_parser,
|
||
|
integer_parser,
|
||
|
literal_string_parser,
|
||
|
keyword_parser,
|
||
|
identifier_parser,
|
||
|
delimiter_parser,
|
||
|
operator_parser,
|
||
|
))(input)
|
||
|
}
|