CanonSharp/CanonSharp.Pascal/Scanner/LexicalScanner.cs
jackfiled cf19f8197e feat: Grammar Parser (#3)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/3
Co-authored-by: jackfiled <xcrenchangjun@outlook.com>
Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
2024-08-18 12:01:27 +08:00

156 lines
5.8 KiB
C#

using CanonSharp.Combinator;
using CanonSharp.Combinator.Abstractions;
using CanonSharp.Combinator.Extensions;
using static CanonSharp.Combinator.Text.TextParserBuilder;
using static CanonSharp.Combinator.ParserBuilder;
namespace CanonSharp.Pascal.Scanner;
public sealed class LexicalScanner
{
private readonly IParser<char, IEnumerable<LexicalToken>> _parser = PascalParser();
public IEnumerable<LexicalToken> Tokenize<TState>(TState state)
where TState : IReadState<char, TState>
{
return _parser.Parse(state).Value;
}
public static IParser<char, LexicalToken> KeywordParser()
{
return from value in Choice(StringIgnoreCase("program"),
StringIgnoreCase("const"),
StringIgnoreCase("var"),
StringIgnoreCase("procedure"),
StringIgnoreCase("function"),
StringIgnoreCase("begin"),
StringIgnoreCase("end"),
StringIgnoreCase("array"),
StringIgnoreCase("of"),
StringIgnoreCase("if"),
StringIgnoreCase("then"),
StringIgnoreCase("else"),
StringIgnoreCase("for"),
StringIgnoreCase("to"),
StringIgnoreCase("do"),
StringIgnoreCase("integer"),
StringIgnoreCase("real"),
StringIgnoreCase("boolean"),
StringIgnoreCase("char"),
StringIgnoreCase("div"),
StringIgnoreCase("not"),
StringIgnoreCase("mod"),
StringIgnoreCase("and"),
StringIgnoreCase("or"),
StringIgnoreCase("true"),
StringIgnoreCase("false"),
StringIgnoreCase("while"))
from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not()
select new LexicalToken(LexicalTokenType.Keyword, value);
}
public static IParser<char, LexicalToken> DelimiterParser()
{
IParser<char, LexicalToken> semicolonParser = from token in Char(':')
from _ in Char('=').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Delimiter, token.ToString());
IParser<char, LexicalToken> periodParser = from token in Char('.')
from _ in Char('.').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Delimiter, ".");
IParser<char, LexicalToken> singleCharTokenParser = from token in Choice(
String(","),
String(";"),
String("("),
String(")"),
String("["),
String("]"),
String(".."))
select new LexicalToken(LexicalTokenType.Delimiter, token);
return singleCharTokenParser | semicolonParser | periodParser;
}
public static IParser<char, LexicalToken> OperatorParser()
{
IParser<char, LexicalToken> lessParser = from token in Char('<')
from _ in Char('=').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Operator, "<");
IParser<char, LexicalToken> greaterParser = from token in Char('>')
from _ in Char('=').LookAhead().Not()
select new LexicalToken(LexicalTokenType.Operator, ">");
IParser<char, LexicalToken> otherParsers = from token in Choice(
String("="),
String("!="),
String("<="),
String(">="),
String("+"),
String("-"),
String("*"),
String("/"),
String(":="))
select new LexicalToken(LexicalTokenType.Operator, token);
return otherParsers | lessParser | greaterParser;
}
public static IParser<char, LexicalToken> ConstIntegerParser()
{
return Choice(
from nums in AsciiDigit().Many1()
from _ in String("..").LookAhead()
select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray())),
from nums in AsciiDigit().Many1()
from _ in Char('.').LookAhead().Not()
select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray()))
);
}
public static IParser<char, LexicalToken> ConstFloatParser()
{
return from integer in AsciiDigit().Many1()
from _ in Char('.')
from fraction in AsciiDigit().Many1()
select new LexicalToken(LexicalTokenType.ConstFloat,
new string(integer.ToArray()) + '.' + new string(fraction.ToArray()));
}
public static IParser<char, LexicalToken> IdentifierParser()
{
return from first in AsciiLetter() | Char('_')
from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many()
select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray()));
}
public static IParser<char, Unit> CommentParser()
{
return Any<char>().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance);
}
public static IParser<char, Unit> JunkParser()
{
return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser();
}
public static IParser<char, LexicalToken> CharParser()
{
return from str in Any<char>().Quote(Char('\'')).Map(x => new string(x.ToArray()))
select str.Length <= 1
? new LexicalToken(LexicalTokenType.Character, str)
: new LexicalToken(LexicalTokenType.String, str);
}
public static IParser<char, IEnumerable<LexicalToken>> PascalParser()
{
return JunkParser().SkipTill(Choice(KeywordParser(),
DelimiterParser(),
OperatorParser(),
ConstFloatParser(),
ConstIntegerParser(),
CharParser(),
IdentifierParser())).Many();
}
}