using CanonSharp.Combinator; using CanonSharp.Combinator.Abstractions; using CanonSharp.Combinator.Extensions; using static CanonSharp.Combinator.Text.TextParserBuilder; using static CanonSharp.Combinator.ParserBuilder; namespace CanonSharp.Common.Scanner; public sealed class LexicalScanner { private readonly Parser> _parser = PascalParser(); public IEnumerable Tokenize(TState state) where TState : IReadState { return _parser.Parse(state).Value; } public static Parser KeywordParser() { return from value in Choice(StringIgnoreCase("program"), StringIgnoreCase("const"), StringIgnoreCase("var"), StringIgnoreCase("procedure"), StringIgnoreCase("function"), StringIgnoreCase("begin"), StringIgnoreCase("end"), StringIgnoreCase("array"), StringIgnoreCase("of"), StringIgnoreCase("if"), StringIgnoreCase("then"), StringIgnoreCase("else"), StringIgnoreCase("for"), StringIgnoreCase("to"), StringIgnoreCase("do"), StringIgnoreCase("integer"), StringIgnoreCase("real"), StringIgnoreCase("boolean"), StringIgnoreCase("char"), StringIgnoreCase("divide"), StringIgnoreCase("not"), StringIgnoreCase("mod"), StringIgnoreCase("and"), StringIgnoreCase("or"), StringIgnoreCase("true"), StringIgnoreCase("false"), StringIgnoreCase("while")) from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not() select new LexicalToken(LexicalTokenType.Keyword, value); } public static Parser DelimiterParser() { Parser semicolonParser = from token in Char(':') from _ in Char('=').LookAhead().Not() select new LexicalToken(LexicalTokenType.Delimiter, token.ToString()); Parser periodParser = from token in Char('.') from _ in Char('.').LookAhead().Not() select new LexicalToken(LexicalTokenType.Delimiter, "."); Parser singleCharTokenParser = from token in Choice( String(","), String(";"), String("("), String(")"), String("["), String("]"), String("..")) select new LexicalToken(LexicalTokenType.Delimiter, token); return singleCharTokenParser | semicolonParser | periodParser; } public static Parser OperatorParser() { Parser lessParser = from token in Char('<') from _ in Char('=').LookAhead().Not() select new LexicalToken(LexicalTokenType.Operator, "<"); Parser greaterParser = from token in Char('>') from _ in Char('=').LookAhead().Not() select new LexicalToken(LexicalTokenType.Operator, ">"); Parser otherParsers = from token in Choice( String("="), String("!="), String("<="), String(">="), String("+"), String("-"), String("*"), String("/"), String(":=")) select new LexicalToken(LexicalTokenType.Operator, token); return otherParsers | lessParser | greaterParser; } public static Parser ConstIntegerParser() { return from nums in AsciiDigit().Many1() from _ in Char('.').LookAhead().Not() select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray())); } public static Parser ConstFloatParser() { return from integer in AsciiDigit().Many1() from _ in Char('.') from fraction in AsciiDigit().Many1() select new LexicalToken(LexicalTokenType.ConstFloat, new string(integer.ToArray()) + '.' + new string(fraction.ToArray())); } public static Parser IdentifierParser() { return from first in AsciiLetter() | Char('_') from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many() select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray())); } public static Parser CommentParser() { return Any().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance); } public static Parser JunkParser() { return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser(); } public static Parser CharParser() { return from str in Any().Quote(Char('\'')).Map(x => new string(x.ToArray())) select str.Length <= 1 ? new LexicalToken(LexicalTokenType.Character, str) : new LexicalToken(LexicalTokenType.String, str); } public static Parser> PascalParser() { return JunkParser().SkipTill(Choice(KeywordParser(), DelimiterParser(), OperatorParser(), ConstIntegerParser(), ConstFloatParser(), CharParser(), IdentifierParser())).Many(); } }