add: tests for zero-parser.

This commit is contained in:
2024-11-19 19:08:05 +08:00
parent a282ad8f24
commit 91c6c42a02
227 changed files with 7797 additions and 0 deletions

133
tests/tokenizer/mod.rs Normal file
View File

@@ -0,0 +1,133 @@
use nom::IResult;
use std::cell::RefCell;
use std::rc::Rc;
use zero_parser::parser::{ParserContext, ParserResult};
mod nom_parsers;
pub mod zero_parsers;
/// 词法令牌的类别
#[derive(Debug, Copy, Clone)]
pub enum LexicalTokenType {
Identifier,
/// 整型常数类型
/// 在词法分析阶段就得到常量的字面值
ConstInteger(u32),
/// 浮点常数类型
ConstFloat(f32),
Keyword,
Delimiter,
Operator,
LiteralString,
}
/// 词法令牌
#[derive(PartialEq, Debug, Copy, Clone)]
pub struct LexicalToken<'a> {
pub token_type: LexicalTokenType,
pub literal_value: &'a str,
}
#[derive(PartialEq, Debug, Copy, Clone)]
pub struct NewLexicalToken<'a> {
pub token_type: LexicalTokenType,
pub literal_value: &'a [char],
}
/// 为词法令牌类型实现相等性判断
/// 重写判断的原因是我们希望不同值类型的整型常量和浮点常量可以是相同的
///
/// 即满足如下的判断:
/// ```
/// use rustic_sysy::tokenizer::LexicalTokenType;
///
/// assert_eq!(LexicalTokenType::ConstInteger(0), LexicalTokenType::ConstInteger(2));
/// assert_eq!(LexicalTokenType::ConstFloat(0f32), LexicalTokenType::ConstFloat(2f32));
/// ```
impl PartialEq for LexicalTokenType {
fn eq(&self, other: &Self) -> bool {
match self {
LexicalTokenType::ConstInteger(_) => match other {
LexicalTokenType::ConstInteger(_) => true,
_ => false,
},
LexicalTokenType::ConstFloat(_) => match other {
LexicalTokenType::ConstFloat(_) => true,
_ => false,
},
LexicalTokenType::LiteralString => match other {
LexicalTokenType::LiteralString => true,
_ => false,
},
LexicalTokenType::Keyword => match other {
LexicalTokenType::Keyword => true,
_ => false,
},
LexicalTokenType::Identifier => match other {
LexicalTokenType::Identifier => true,
_ => false,
},
LexicalTokenType::Delimiter => match other {
LexicalTokenType::Delimiter => true,
_ => false,
},
LexicalTokenType::Operator => match other {
LexicalTokenType::Operator => true,
_ => false,
},
}
}
}
impl<'a, 'b> PartialEq<NewLexicalToken<'a>> for LexicalToken<'b> {
fn eq(&self, other: &NewLexicalToken) -> bool {
self.token_type == other.token_type
&& self.literal_value.chars().collect::<String>()
== other.literal_value.iter().collect::<String>()
}
}
pub fn nom_lexical_parser(mut input: &str) -> IResult<&str, Vec<LexicalToken>> {
let mut array = vec![];
while !input.is_empty() {
if let Ok((i, _)) = nom_parsers::junk_parser(input) {
if i.is_empty() {
break;
}
input = i;
continue;
}
let (i, token) = nom_parsers::combine_parser(input)?;
input = i;
array.push(token);
}
Ok((input, array))
}
pub fn zero_lexical_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
mut input: &[char],
) -> ParserResult<char, Vec<NewLexicalToken>> {
let mut array = vec![];
while !input.is_empty() {
if let Ok((i, _)) = zero_parsers::junk_parser(context.clone(), input) {
if i.is_empty() {
break;
}
input = i;
continue;
}
let (i, token) = zero_parsers::combine_parser(context.clone(), input)?;
input = i;
array.push(token);
}
Ok((input, array))
}

View File

@@ -0,0 +1,180 @@
use crate::tokenizer::{LexicalToken, LexicalTokenType};
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until};
use nom::character::complete::{
alpha1, alphanumeric1, digit0, digit1, hex_digit0, multispace1, oct_digit0, one_of,
};
use nom::combinator::{map, not, peek, recognize, value};
use nom::multi::many0_count;
use nom::sequence::{delimited, pair, tuple};
use nom::IResult;
use std::str::FromStr;
fn keyword_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
recognize(tuple((
alt((
tag("break"),
tag("const"),
tag("continue"),
tag("else"),
tag("float"),
tag("if"),
tag("int"),
tag("return"),
tag("void"),
tag("while"),
)),
not(alt((peek(alphanumeric1), tag("_")))),
))),
|x| LexicalToken {
token_type: LexicalTokenType::Keyword,
literal_value: x,
},
)(input)
}
fn delimiter_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
alt((
tag(","),
tag(";"),
tag("("),
tag(")"),
tag("["),
tag("]"),
tag("{"),
tag("}"),
)),
|x| LexicalToken {
token_type: LexicalTokenType::Delimiter,
literal_value: x,
},
)(input)
}
fn operator_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
alt((
tag(">="),
tag("<="),
tag("=="),
tag("!="),
tag("&&"),
tag("||"),
tag("="),
tag("+"),
tag("-"),
tag("!"),
tag("*"),
tag("/"),
tag("%"),
tag(">"),
tag("<"),
)),
|x| LexicalToken {
token_type: LexicalTokenType::Operator,
literal_value: x,
},
)(input)
}
fn identifier_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
recognize(pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_")))),
)),
|s| LexicalToken {
token_type: LexicalTokenType::Identifier,
literal_value: s,
},
)(input)
}
fn decimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
map(recognize(pair(one_of("123456789"), digit0)), |x| {
let number = u32::from_str_radix(x, 10).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})(input)
}
fn octal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
map(recognize(pair(tag("0"), oct_digit0)), |x| {
let number = u32::from_str_radix(x, 8).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})(input)
}
fn hexadecimal_integer_parser(input: &str) -> IResult<&str, LexicalToken> {
map(
recognize(pair(alt((tag("0x"), tag("0X"))), hex_digit0)),
|x: &str| {
let number = u32::from_str_radix(&x[2..], 16).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
},
)(input)
}
fn integer_parser(input: &str) -> IResult<&str, LexicalToken> {
alt((
hexadecimal_integer_parser,
octal_integer_parser,
decimal_integer_parser,
))(input)
}
fn float_parser(input: &str) -> IResult<&str, LexicalToken> {
map(recognize(tuple((digit1, tag("."), digit1))), |x| {
let number = f32::from_str(x).unwrap();
LexicalToken {
token_type: LexicalTokenType::ConstFloat(number),
literal_value: x,
}
})(input)
}
fn literal_string_parser(input: &str) -> IResult<&str, LexicalToken> {
map(delimited(tag("\""), take_until("\""), tag("\"")), |s| {
LexicalToken {
token_type: LexicalTokenType::LiteralString,
literal_value: s,
}
})(input)
}
fn comments_parser(input: &str) -> IResult<&str, ()> {
alt((
value((), tuple((tag("//"), take_until("\n"), tag("\n")))),
value((), tuple((tag("/*"), take_until("*/"), tag("*/")))),
))(input)
}
pub fn junk_parser(input: &str) -> IResult<&str, ()> {
alt((value((), multispace1), comments_parser))(input)
}
pub fn combine_parser(input: &str) -> IResult<&str, LexicalToken> {
alt((
float_parser,
integer_parser,
literal_string_parser,
keyword_parser,
identifier_parser,
delimiter_parser,
operator_parser,
))(input)
}

View File

@@ -0,0 +1,266 @@
use crate::tokenizer::{LexicalTokenType, NewLexicalToken};
use nom::AsChar;
use std::cell::RefCell;
use std::rc::Rc;
use std::str::FromStr;
use zero_parser::combinators::{quote, take_till, tuple, ParserExt};
use zero_parser::parser::{any, Parser, ParserContext, ParserResult};
use zero_parser::text::{char_parser, one_of, string_parser};
use zero_parser::{alternate, parser::satisfy};
pub fn keyword_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
alternate!(
string_parser("break"),
string_parser("const"),
string_parser("continue"),
string_parser("else"),
string_parser("float"),
string_parser("if"),
string_parser("int"),
string_parser("return"),
string_parser("void"),
string_parser("while")
),
alternate!(satisfy(|c: &char| c.is_alphanumeric()), char_parser('_'))
.look_ahead()
.reverse(()),
))
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Keyword,
literal_value: x,
})
.parse(context, input)
}
pub fn delimiter_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
char_parser(','),
char_parser(';'),
char_parser('('),
char_parser(')'),
char_parser('['),
char_parser(']'),
char_parser('{'),
char_parser('}')
)
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Delimiter,
literal_value: x,
})
.parse(context, input)
}
pub fn operator_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
string_parser(">="),
string_parser("<="),
string_parser("=="),
string_parser("!="),
string_parser("&&"),
string_parser("||"),
string_parser("="),
string_parser("+"),
string_parser("-"),
string_parser("!"),
string_parser("*"),
string_parser("/"),
string_parser("%"),
string_parser(">"),
string_parser("<")
)
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Operator,
literal_value: x,
})
.parse(context, input)
}
pub fn identifier_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
alternate!(satisfy(|c: &char| c.is_alphabetic()), char_parser('_')),
alternate!(satisfy(|c: &char| c.is_alphanumeric()), char_parser('_')).many(),
))
.literal()
.map(|x| NewLexicalToken {
token_type: LexicalTokenType::Identifier,
literal_value: x,
})
.parse(context, input)
}
pub fn decimal_integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
one_of("123456789"),
satisfy(|c: &char| c.is_ascii_digit()).many(),
))
.literal()
.map(|x| {
let word: String = x.iter().map(|x| x.clone()).collect();
let number = u32::from_str_radix(word.as_str(), 10).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn octal_integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
char_parser('0'),
satisfy(|c: &char| c.is_oct_digit()).many(),
))
.literal()
.map(|x| {
let word: String = x.iter().collect();
let number = u32::from_str_radix(word.as_str(), 8).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn hexadecimal_integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
alternate!(string_parser("0x"), string_parser("0X")),
satisfy(|c: &char| c.is_hex_digit()).many(),
))
.literal()
.map(|x| {
let word: String = (&x[2..]).iter().collect();
let number = u32::from_str_radix(word.as_str(), 16).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstInteger(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn integer_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
hexadecimal_integer_parser,
octal_integer_parser,
decimal_integer_parser
)
.parse(context, input)
}
pub fn float_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
tuple((
satisfy(|c: &char| c.is_ascii_digit()).many1(),
char_parser('.'),
satisfy(|c: &char| c.is_ascii_digit()).many1(),
))
.literal()
.map(|x| {
let word: String = x.iter().map(|c| c.clone()).collect();
let number = f32::from_str(word.as_str()).unwrap();
NewLexicalToken {
token_type: LexicalTokenType::ConstFloat(number),
literal_value: x,
}
})
.parse(context, input)
}
pub fn literal_string_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
quote(char_parser('"'), any(), char_parser('"'))
.literal()
.map(|x| {
let length = x.len();
NewLexicalToken {
token_type: LexicalTokenType::LiteralString,
literal_value: &x[1..length],
}
})
.parse(context, input)
}
pub fn comments_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, ()> {
alternate!(
tuple((
string_parser("//"),
take_till(char_parser('\n')),
char_parser('\n')
))
.map(|_| ()),
tuple((
string_parser("/*"),
take_till(string_parser("*/")),
string_parser("*/")
))
.map(|_| ())
)
.parse(context, input)
}
pub fn junk_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, ()> {
alternate!(
comments_parser,
satisfy(|c: &char| c.is_whitespace()).many1().map(|_| ())
)
.parse(context, input)
}
pub fn combine_parser(
context: Rc<RefCell<ParserContext<char, ()>>>,
input: &[char],
) -> ParserResult<char, NewLexicalToken> {
alternate!(
float_parser,
integer_parser,
literal_string_parser,
keyword_parser,
identifier_parser,
delimiter_parser,
operator_parser
)
.parse(context, input)
}