CanonSharp/CanonSharp.Common/Scanner/LexicalScanner.cs
jackfiled 3ed8bf5d36
All checks were successful
Run unit test / Unit-Test (push) Successful in 41s
feat: Parser Combinator库和词法分析器 (#2)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/2
Co-authored-by: jackfiled <xcrenchangjun@outlook.com>
Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
2024-08-13 14:46:11 +08:00

151 lines
5.6 KiB
C#

using CanonSharp.Combinator;
using CanonSharp.Combinator.Abstractions;
using CanonSharp.Combinator.Extensions;
using static CanonSharp.Combinator.Text.TextParserBuilder;
using static CanonSharp.Combinator.ParserBuilder;
namespace CanonSharp.Common.Scanner;
public sealed class LexicalScanner
{
private readonly Parser<char, IEnumerable<LexicalToken>> _parser = PascalParser();
public IEnumerable<LexicalToken> Tokenize<TState>(TState state)
where TState : IReadState<char, TState>
{
return _parser.Parse(state).Value;
}
public static Parser<char, LexicalToken> KeywordParser()
{
return from value in Choice(StringIgnoreCase("program"),
StringIgnoreCase("const"),
StringIgnoreCase("var"),
StringIgnoreCase("procedure"),
StringIgnoreCase("function"),
StringIgnoreCase("begin"),
StringIgnoreCase("end"),
StringIgnoreCase("array"),
StringIgnoreCase("of"),
StringIgnoreCase("if"),
StringIgnoreCase("then"),
StringIgnoreCase("else"),
StringIgnoreCase("for"),
StringIgnoreCase("to"),
StringIgnoreCase("do"),
StringIgnoreCase("integer"),
StringIgnoreCase("real"),
StringIgnoreCase("boolean"),
StringIgnoreCase("char"),
StringIgnoreCase("divide"),
StringIgnoreCase("not"),
StringIgnoreCase("mod"),
StringIgnoreCase("and"),
StringIgnoreCase("or"),
StringIgnoreCase("true"),
StringIgnoreCase("false"),
StringIgnoreCase("while"))
from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not()
select new LexicalToken(LexicalTokenType.Keyword, value);
}
public static Parser<char, LexicalToken> DelimiterParser()
{
Parser<char, LexicalToken> semicolonParser = from token in Char(':')
from _ in Char('=').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Delimiter, token.ToString());
Parser<char, LexicalToken> periodParser = from token in Char('.')
from _ in Char('.').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Delimiter, ".");
Parser<char, LexicalToken> singleCharTokenParser = from token in Choice(
String(","),
String(";"),
String("("),
String(")"),
String("["),
String("]"),
String(".."))
select new LexicalToken(LexicalTokenType.Delimiter, token);
return singleCharTokenParser | semicolonParser | periodParser;
}
public static Parser<char, LexicalToken> OperatorParser()
{
Parser<char, LexicalToken> lessParser = from token in Char('<')
from _ in Char('=').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Operator, "<");
Parser<char, LexicalToken> greaterParser = from token in Char('>')
from _ in Char('=').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Operator, ">");
Parser<char, LexicalToken> otherParsers = from token in Choice(
String("="),
String("!="),
String("<="),
String(">="),
String("+"),
String("-"),
String("*"),
String("/"),
String(":="))
select new LexicalToken(LexicalTokenType.Operator, token);
return otherParsers | lessParser | greaterParser;
}
public static Parser<char, LexicalToken> ConstIntegerParser()
{
return from nums in AsciiDigit().Many1()
from _ in Char('.').LookAhead().Not()
select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray()));
}
public static Parser<char, LexicalToken> ConstFloatParser()
{
return from integer in AsciiDigit().Many1()
from _ in Char('.')
from fraction in AsciiDigit().Many1()
select new LexicalToken(LexicalTokenType.ConstFloat,
new string(integer.ToArray()) + '.' + new string(fraction.ToArray()));
}
public static Parser<char, LexicalToken> IdentifierParser()
{
return from first in AsciiLetter() | Char('_')
from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many()
select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray()));
}
public static Parser<char, Unit> CommentParser()
{
return Any<char>().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance);
}
public static Parser<char, Unit> JunkParser()
{
return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser();
}
public static Parser<char, LexicalToken> CharParser()
{
return from str in Any<char>().Quote(Char('\'')).Map(x => new string(x.ToArray()))
select str.Length <= 1
? new LexicalToken(LexicalTokenType.Character, str)
: new LexicalToken(LexicalTokenType.String, str);
}
public static Parser<char, IEnumerable<LexicalToken>> PascalParser()
{
return JunkParser().SkipTill(Choice(KeywordParser(),
DelimiterParser(),
OperatorParser(),
ConstIntegerParser(),
ConstFloatParser(),
CharParser(),
IdentifierParser())).Many();
}
}