267 lines
7.0 KiB
Rust
267 lines
7.0 KiB
Rust
use crate::tokenizer::{LexicalTokenType, NewLexicalToken};
|
|
use nom::AsChar;
|
|
use std::cell::RefCell;
|
|
use std::rc::Rc;
|
|
use std::str::FromStr;
|
|
use zero_parser::combinators::{quote, take_till, tuple, ParserExt};
|
|
use zero_parser::parser::{any, Parser, ParserContext, ParserResult};
|
|
use zero_parser::text::{char_parser, one_of, string_parser};
|
|
use zero_parser::{alternate, parser::satisfy};
|
|
|
|
pub fn keyword_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
tuple((
|
|
alternate!(
|
|
string_parser("break"),
|
|
string_parser("const"),
|
|
string_parser("continue"),
|
|
string_parser("else"),
|
|
string_parser("float"),
|
|
string_parser("if"),
|
|
string_parser("int"),
|
|
string_parser("return"),
|
|
string_parser("void"),
|
|
string_parser("while")
|
|
),
|
|
alternate!(satisfy(|c: &char| c.is_alphanumeric()), char_parser('_'))
|
|
.look_ahead()
|
|
.reverse(()),
|
|
))
|
|
.literal()
|
|
.map(|x| NewLexicalToken {
|
|
token_type: LexicalTokenType::Keyword,
|
|
literal_value: x,
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn delimiter_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
alternate!(
|
|
char_parser(','),
|
|
char_parser(';'),
|
|
char_parser('('),
|
|
char_parser(')'),
|
|
char_parser('['),
|
|
char_parser(']'),
|
|
char_parser('{'),
|
|
char_parser('}')
|
|
)
|
|
.literal()
|
|
.map(|x| NewLexicalToken {
|
|
token_type: LexicalTokenType::Delimiter,
|
|
literal_value: x,
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn operator_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
alternate!(
|
|
string_parser(">="),
|
|
string_parser("<="),
|
|
string_parser("=="),
|
|
string_parser("!="),
|
|
string_parser("&&"),
|
|
string_parser("||"),
|
|
string_parser("="),
|
|
string_parser("+"),
|
|
string_parser("-"),
|
|
string_parser("!"),
|
|
string_parser("*"),
|
|
string_parser("/"),
|
|
string_parser("%"),
|
|
string_parser(">"),
|
|
string_parser("<")
|
|
)
|
|
.literal()
|
|
.map(|x| NewLexicalToken {
|
|
token_type: LexicalTokenType::Operator,
|
|
literal_value: x,
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn identifier_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
tuple((
|
|
alternate!(satisfy(|c: &char| c.is_alphabetic()), char_parser('_')),
|
|
alternate!(satisfy(|c: &char| c.is_alphanumeric()), char_parser('_')).many(),
|
|
))
|
|
.literal()
|
|
.map(|x| NewLexicalToken {
|
|
token_type: LexicalTokenType::Identifier,
|
|
literal_value: x,
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn decimal_integer_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
tuple((
|
|
one_of("123456789"),
|
|
satisfy(|c: &char| c.is_ascii_digit()).many(),
|
|
))
|
|
.literal()
|
|
.map(|x| {
|
|
let word: String = x.iter().map(|x| x.clone()).collect();
|
|
let number = u32::from_str_radix(word.as_str(), 10).unwrap();
|
|
|
|
NewLexicalToken {
|
|
token_type: LexicalTokenType::ConstInteger(number),
|
|
literal_value: x,
|
|
}
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn octal_integer_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
tuple((
|
|
char_parser('0'),
|
|
satisfy(|c: &char| c.is_oct_digit()).many(),
|
|
))
|
|
.literal()
|
|
.map(|x| {
|
|
let word: String = x.iter().collect();
|
|
let number = u32::from_str_radix(word.as_str(), 8).unwrap();
|
|
|
|
NewLexicalToken {
|
|
token_type: LexicalTokenType::ConstInteger(number),
|
|
literal_value: x,
|
|
}
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn hexadecimal_integer_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
tuple((
|
|
alternate!(string_parser("0x"), string_parser("0X")),
|
|
satisfy(|c: &char| c.is_hex_digit()).many(),
|
|
))
|
|
.literal()
|
|
.map(|x| {
|
|
let word: String = (&x[2..]).iter().collect();
|
|
let number = u32::from_str_radix(word.as_str(), 16).unwrap();
|
|
|
|
NewLexicalToken {
|
|
token_type: LexicalTokenType::ConstInteger(number),
|
|
literal_value: x,
|
|
}
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn integer_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
alternate!(
|
|
hexadecimal_integer_parser,
|
|
octal_integer_parser,
|
|
decimal_integer_parser
|
|
)
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn float_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
tuple((
|
|
satisfy(|c: &char| c.is_ascii_digit()).many1(),
|
|
char_parser('.'),
|
|
satisfy(|c: &char| c.is_ascii_digit()).many1(),
|
|
))
|
|
.literal()
|
|
.map(|x| {
|
|
let word: String = x.iter().map(|c| c.clone()).collect();
|
|
let number = f32::from_str(word.as_str()).unwrap();
|
|
|
|
NewLexicalToken {
|
|
token_type: LexicalTokenType::ConstFloat(number),
|
|
literal_value: x,
|
|
}
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn literal_string_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
quote(char_parser('"'), any(), char_parser('"'))
|
|
.literal()
|
|
.map(|x| {
|
|
let length = x.len();
|
|
NewLexicalToken {
|
|
token_type: LexicalTokenType::LiteralString,
|
|
literal_value: &x[1..length - 1],
|
|
}
|
|
})
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn comments_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, ()> {
|
|
alternate!(
|
|
tuple((
|
|
string_parser("//"),
|
|
take_till(char_parser('\n')),
|
|
char_parser('\n')
|
|
))
|
|
.map(|_| ()),
|
|
tuple((
|
|
string_parser("/*"),
|
|
take_till(string_parser("*/")),
|
|
string_parser("*/")
|
|
))
|
|
.map(|_| ())
|
|
)
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn junk_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, ()> {
|
|
alternate!(
|
|
comments_parser,
|
|
satisfy(|c: &char| c.is_whitespace()).many1().map(|_| ())
|
|
)
|
|
.parse(context, input)
|
|
}
|
|
|
|
pub fn combine_parser(
|
|
context: Rc<RefCell<ParserContext<char, ()>>>,
|
|
input: &[char],
|
|
) -> ParserResult<char, NewLexicalToken> {
|
|
alternate!(
|
|
float_parser,
|
|
integer_parser,
|
|
literal_string_parser,
|
|
keyword_parser,
|
|
identifier_parser,
|
|
delimiter_parser,
|
|
operator_parser
|
|
)
|
|
.parse(context, input)
|
|
}
|