zero-parser/tests/tokenizer/zero_parsers.rs

267 lines
7.0 KiB
Rust

use crate::tokenizer::{LexicalTokenType, NewLexicalToken};
use nom::AsChar;
use std::cell::RefCell;
use std::rc::Rc;
use std::str::FromStr;
use zero_parser::combinators::{quote, take_till, tuple, ParserExt};
use zero_parser::parser::{any, Parser, ParserContext, ParserResult};
use zero_parser::text::{char_parser, one_of, string_parser};
use zero_parser::{alternate, parser::satisfy};
pub fn keyword_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
alternate!(
string_parser("break"),
string_parser("const"),
string_parser("continue"),
string_parser("else"),
string_parser("float"),
string_parser("if"),
string_parser("int"),
string_parser("return"),
string_parser("void"),
string_parser("while")
),
alternate!(satisfy(|c: &char| c.is_alphanumeric()), char_parser('_'))
.look_ahead()
.reverse(()),
))
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Keyword,
literal_value: x,
})
.parse(context, input)
}
pub fn delimiter_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
char_parser(','),
char_parser(';'),
char_parser('('),
char_parser(')'),
char_parser('['),
char_parser(']'),
char_parser('{'),
char_parser('}')
)
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Delimiter,
literal_value: x,
})
.parse(context, input)
}
pub fn operator_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
string_parser(">="),
string_parser("<="),
string_parser("=="),
string_parser("!="),
string_parser("&&"),
string_parser("||"),
string_parser("="),
string_parser("+"),
string_parser("-"),
string_parser("!"),
string_parser("*"),
string_parser("/"),
string_parser("%"),
string_parser(">"),
string_parser("<")
)
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Operator,
literal_value: x,
})
.parse(context, input)
}
pub fn identifier_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
alternate!(satisfy(|c: &char| c.is_alphabetic()), char_parser('_')),
alternate!(satisfy(|c: &char| c.is_alphanumeric()), char_parser('_')).many(),
))
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Identifier,
literal_value: x,
})
.parse(context, input)
}
pub fn decimal_integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
one_of("123456789"),
satisfy(|c: &char| c.is_ascii_digit()).many(),
))
.literal()
.map(|x| {
let word: String = x.iter().map(|x| x.clone()).collect();
let number = u32::from_str_radix(word.as_str(), 10).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn octal_integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
char_parser('0'),
satisfy(|c: &char| c.is_oct_digit()).many(),
))
.literal()
.map(|x| {
let word: String = x.iter().collect();
let number = u32::from_str_radix(word.as_str(), 8).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn hexadecimal_integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
alternate!(string_parser("0x"), string_parser("0X")),
satisfy(|c: &char| c.is_hex_digit()).many(),
))
.literal()
.map(|x| {
let word: String = (&x[2..]).iter().collect();
let number = u32::from_str_radix(word.as_str(), 16).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
hexadecimal_integer_parser,
octal_integer_parser,
decimal_integer_parser
)
.parse(context, input)
}
pub fn float_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
satisfy(|c: &char| c.is_ascii_digit()).many1(),
char_parser('.'),
satisfy(|c: &char| c.is_ascii_digit()).many1(),
))
.literal()
.map(|x| {
let word: String = x.iter().map(|c| c.clone()).collect();
let number = f32::from_str(word.as_str()).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstFloat(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn literal_string_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
quote(char_parser('"'), any(), char_parser('"'))
.literal()
.map(|x| {
let length = x.len();
NewLexicalToken {
token_type: LexicalTokenType::LiteralString,
literal_value: &x[1..length - 1],
}
})
.parse(context, input)
}
pub fn comments_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, ()> {
alternate!(
tuple((
string_parser("//"),
take_till(char_parser('\n')),
char_parser('\n')
))
.map(|_| ()),
tuple((
string_parser("/*"),
take_till(string_parser("*/")),
string_parser("*/")
))
.map(|_| ())
)
.parse(context, input)
}
pub fn junk_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, ()> {
alternate!(
comments_parser,
satisfy(|c: &char| c.is_whitespace()).many1().map(|_| ())
)
.parse(context, input)
}
pub fn combine_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
float_parser,
integer_parser,
literal_string_parser,
keyword_parser,
identifier_parser,
delimiter_parser,
operator_parser
)
.parse(context, input)
}