using CanonSharp.Combinator; using CanonSharp.Combinator.Abstractions; using CanonSharp.Combinator.Extensions; using static CanonSharp.Combinator.Text.TextParserBuilder; using static CanonSharp.Combinator.ParserBuilder; namespace CanonSharp.Pascal.Scanner; public sealed class LexicalScanner { private readonly IParser> _parser = PascalParser(); public IEnumerable Tokenize(TState state) where TState : IReadState { return _parser.Parse(state).Value; } public static IParser KeywordParser() { return from value in Choice(StringIgnoreCase("program"), StringIgnoreCase("const"), StringIgnoreCase("var"), StringIgnoreCase("procedure"), StringIgnoreCase("function"), StringIgnoreCase("begin"), StringIgnoreCase("end"), StringIgnoreCase("array"), StringIgnoreCase("of"), StringIgnoreCase("if"), StringIgnoreCase("then"), StringIgnoreCase("else"), StringIgnoreCase("for"), StringIgnoreCase("to"), StringIgnoreCase("do"), StringIgnoreCase("integer"), StringIgnoreCase("real"), StringIgnoreCase("boolean"), StringIgnoreCase("char"), StringIgnoreCase("divide"), StringIgnoreCase("not"), StringIgnoreCase("mod"), StringIgnoreCase("and"), StringIgnoreCase("or"), StringIgnoreCase("true"), StringIgnoreCase("false"), StringIgnoreCase("while")) from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not() select new LexicalToken(LexicalTokenType.Keyword, value); } public static IParser DelimiterParser() { IParser semicolonParser = from token in Char(':') from _ in Char('=').LookAhead().Not() select new LexicalToken(LexicalTokenType.Delimiter, token.ToString()); IParser periodParser = from token in Char('.') from _ in Char('.').LookAhead().Not() select new LexicalToken(LexicalTokenType.Delimiter, "."); IParser singleCharTokenParser = from token in Choice( String(","), String(";"), String("("), String(")"), String("["), String("]"), String("..")) select new LexicalToken(LexicalTokenType.Delimiter, token); return singleCharTokenParser | semicolonParser | periodParser; } public static IParser OperatorParser() { IParser lessParser = from token in Char('<') from _ in Char('=').LookAhead().Not() select new LexicalToken(LexicalTokenType.Operator, "<"); IParser greaterParser = from token in Char('>') from _ in Char('=').LookAhead().Not() select new LexicalToken(LexicalTokenType.Operator, ">"); IParser otherParsers = from token in Choice( String("="), String("!="), String("<="), String(">="), String("+"), String("-"), String("*"), String("/"), String(":=")) select new LexicalToken(LexicalTokenType.Operator, token); return otherParsers | lessParser | greaterParser; } public static IParser ConstIntegerParser() { return from nums in AsciiDigit().Many1() from _ in Char('.').LookAhead().Not() select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray())); } public static IParser ConstFloatParser() { return from integer in AsciiDigit().Many1() from _ in Char('.') from fraction in AsciiDigit().Many1() select new LexicalToken(LexicalTokenType.ConstFloat, new string(integer.ToArray()) + '.' + new string(fraction.ToArray())); } public static IParser IdentifierParser() { return from first in AsciiLetter() | Char('_') from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many() select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray())); } public static IParser CommentParser() { return Any().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance); } public static IParser JunkParser() { return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser(); } public static IParser CharParser() { return from str in Any().Quote(Char('\'')).Map(x => new string(x.ToArray())) select str.Length <= 1 ? new LexicalToken(LexicalTokenType.Character, str) : new LexicalToken(LexicalTokenType.String, str); } public static IParser> PascalParser() { return JunkParser().SkipTill(Choice(KeywordParser(), DelimiterParser(), OperatorParser(), ConstIntegerParser(), ConstFloatParser(), CharParser(), IdentifierParser())).Many(); } }