156 lines
5.8 KiB
C#
156 lines
5.8 KiB
C#
using CanonSharp.Combinator;
|
|
using CanonSharp.Combinator.Abstractions;
|
|
using CanonSharp.Combinator.Extensions;
|
|
using static CanonSharp.Combinator.Text.TextParserBuilder;
|
|
using static CanonSharp.Combinator.ParserBuilder;
|
|
|
|
namespace CanonSharp.Pascal.Scanner;
|
|
|
|
public sealed class LexicalScanner
|
|
{
|
|
private readonly IParser<char, IEnumerable<LexicalToken>> _parser = PascalParser();
|
|
|
|
public IEnumerable<LexicalToken> Tokenize<TState>(TState state)
|
|
where TState : IReadState<char, TState>
|
|
{
|
|
return _parser.Parse(state).Value;
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> KeywordParser()
|
|
{
|
|
return from value in Choice(StringIgnoreCase("program"),
|
|
StringIgnoreCase("const"),
|
|
StringIgnoreCase("var"),
|
|
StringIgnoreCase("procedure"),
|
|
StringIgnoreCase("function"),
|
|
StringIgnoreCase("begin"),
|
|
StringIgnoreCase("end"),
|
|
StringIgnoreCase("array"),
|
|
StringIgnoreCase("of"),
|
|
StringIgnoreCase("if"),
|
|
StringIgnoreCase("then"),
|
|
StringIgnoreCase("else"),
|
|
StringIgnoreCase("for"),
|
|
StringIgnoreCase("to"),
|
|
StringIgnoreCase("do"),
|
|
StringIgnoreCase("integer"),
|
|
StringIgnoreCase("real"),
|
|
StringIgnoreCase("boolean"),
|
|
StringIgnoreCase("char"),
|
|
StringIgnoreCase("div"),
|
|
StringIgnoreCase("not"),
|
|
StringIgnoreCase("mod"),
|
|
StringIgnoreCase("and"),
|
|
StringIgnoreCase("or"),
|
|
StringIgnoreCase("true"),
|
|
StringIgnoreCase("false"),
|
|
StringIgnoreCase("while"))
|
|
from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not()
|
|
select new LexicalToken(LexicalTokenType.Keyword, value);
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> DelimiterParser()
|
|
{
|
|
IParser<char, LexicalToken> semicolonParser = from token in Char(':')
|
|
from _ in Char('=').LookAhead().Not()
|
|
select new LexicalToken(LexicalTokenType.Delimiter, token.ToString());
|
|
IParser<char, LexicalToken> periodParser = from token in Char('.')
|
|
from _ in Char('.').LookAhead().Not()
|
|
select new LexicalToken(LexicalTokenType.Delimiter, ".");
|
|
|
|
IParser<char, LexicalToken> singleCharTokenParser = from token in Choice(
|
|
String(","),
|
|
String(";"),
|
|
String("("),
|
|
String(")"),
|
|
String("["),
|
|
String("]"),
|
|
String(".."))
|
|
select new LexicalToken(LexicalTokenType.Delimiter, token);
|
|
|
|
return singleCharTokenParser | semicolonParser | periodParser;
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> OperatorParser()
|
|
{
|
|
IParser<char, LexicalToken> lessParser = from token in Char('<')
|
|
from _ in Char('=').LookAhead().Not()
|
|
select new LexicalToken(LexicalTokenType.Operator, "<");
|
|
|
|
IParser<char, LexicalToken> greaterParser = from token in Char('>')
|
|
from _ in Char('=').LookAhead().Not()
|
|
select new LexicalToken(LexicalTokenType.Operator, ">");
|
|
|
|
IParser<char, LexicalToken> otherParsers = from token in Choice(
|
|
String("="),
|
|
String("!="),
|
|
String("<="),
|
|
String(">="),
|
|
String("+"),
|
|
String("-"),
|
|
String("*"),
|
|
String("/"),
|
|
String(":="))
|
|
select new LexicalToken(LexicalTokenType.Operator, token);
|
|
|
|
return otherParsers | lessParser | greaterParser;
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> ConstIntegerParser()
|
|
{
|
|
return Choice(
|
|
from nums in AsciiDigit().Many1()
|
|
from _ in String("..").LookAhead()
|
|
select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray())),
|
|
from nums in AsciiDigit().Many1()
|
|
from _ in Char('.').LookAhead().Not()
|
|
select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray()))
|
|
);
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> ConstFloatParser()
|
|
{
|
|
return from integer in AsciiDigit().Many1()
|
|
from _ in Char('.')
|
|
from fraction in AsciiDigit().Many1()
|
|
select new LexicalToken(LexicalTokenType.ConstFloat,
|
|
new string(integer.ToArray()) + '.' + new string(fraction.ToArray()));
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> IdentifierParser()
|
|
{
|
|
return from first in AsciiLetter() | Char('_')
|
|
from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many()
|
|
select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray()));
|
|
}
|
|
|
|
public static IParser<char, Unit> CommentParser()
|
|
{
|
|
return Any<char>().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance);
|
|
}
|
|
|
|
public static IParser<char, Unit> JunkParser()
|
|
{
|
|
return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser();
|
|
}
|
|
|
|
public static IParser<char, LexicalToken> CharParser()
|
|
{
|
|
return from str in Any<char>().Quote(Char('\'')).Map(x => new string(x.ToArray()))
|
|
select str.Length <= 1
|
|
? new LexicalToken(LexicalTokenType.Character, str)
|
|
: new LexicalToken(LexicalTokenType.String, str);
|
|
}
|
|
|
|
public static IParser<char, IEnumerable<LexicalToken>> PascalParser()
|
|
{
|
|
return JunkParser().SkipTill(Choice(KeywordParser(),
|
|
DelimiterParser(),
|
|
OperatorParser(),
|
|
ConstFloatParser(),
|
|
ConstIntegerParser(),
|
|
CharParser(),
|
|
IdentifierParser())).Many();
|
|
}
|
|
}
|