feat: Parser Combinator库和词法分析器 (#2)
All checks were successful
Run unit test / Unit-Test (push) Successful in 41s
All checks were successful
Run unit test / Unit-Test (push) Successful in 41s
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/2 Co-authored-by: jackfiled <xcrenchangjun@outlook.com> Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
This commit is contained in:
150
CanonSharp.Common/Scanner/LexicalScanner.cs
Normal file
150
CanonSharp.Common/Scanner/LexicalScanner.cs
Normal file
@@ -0,0 +1,150 @@
|
||||
using CanonSharp.Combinator;
|
||||
using CanonSharp.Combinator.Abstractions;
|
||||
using CanonSharp.Combinator.Extensions;
|
||||
using static CanonSharp.Combinator.Text.TextParserBuilder;
|
||||
using static CanonSharp.Combinator.ParserBuilder;
|
||||
|
||||
namespace CanonSharp.Common.Scanner;
|
||||
|
||||
public sealed class LexicalScanner
|
||||
{
|
||||
private readonly Parser<char, IEnumerable<LexicalToken>> _parser = PascalParser();
|
||||
|
||||
public IEnumerable<LexicalToken> Tokenize<TState>(TState state)
|
||||
where TState : IReadState<char, TState>
|
||||
{
|
||||
return _parser.Parse(state).Value;
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> KeywordParser()
|
||||
{
|
||||
return from value in Choice(StringIgnoreCase("program"),
|
||||
StringIgnoreCase("const"),
|
||||
StringIgnoreCase("var"),
|
||||
StringIgnoreCase("procedure"),
|
||||
StringIgnoreCase("function"),
|
||||
StringIgnoreCase("begin"),
|
||||
StringIgnoreCase("end"),
|
||||
StringIgnoreCase("array"),
|
||||
StringIgnoreCase("of"),
|
||||
StringIgnoreCase("if"),
|
||||
StringIgnoreCase("then"),
|
||||
StringIgnoreCase("else"),
|
||||
StringIgnoreCase("for"),
|
||||
StringIgnoreCase("to"),
|
||||
StringIgnoreCase("do"),
|
||||
StringIgnoreCase("integer"),
|
||||
StringIgnoreCase("real"),
|
||||
StringIgnoreCase("boolean"),
|
||||
StringIgnoreCase("char"),
|
||||
StringIgnoreCase("divide"),
|
||||
StringIgnoreCase("not"),
|
||||
StringIgnoreCase("mod"),
|
||||
StringIgnoreCase("and"),
|
||||
StringIgnoreCase("or"),
|
||||
StringIgnoreCase("true"),
|
||||
StringIgnoreCase("false"),
|
||||
StringIgnoreCase("while"))
|
||||
from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not()
|
||||
select new LexicalToken(LexicalTokenType.Keyword, value);
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> DelimiterParser()
|
||||
{
|
||||
Parser<char, LexicalToken> semicolonParser = from token in Char(':')
|
||||
from _ in Char('=').LookAhead().Not()
|
||||
select new LexicalToken(LexicalTokenType.Delimiter, token.ToString());
|
||||
Parser<char, LexicalToken> periodParser = from token in Char('.')
|
||||
from _ in Char('.').LookAhead().Not()
|
||||
select new LexicalToken(LexicalTokenType.Delimiter, ".");
|
||||
|
||||
Parser<char, LexicalToken> singleCharTokenParser = from token in Choice(
|
||||
String(","),
|
||||
String(";"),
|
||||
String("("),
|
||||
String(")"),
|
||||
String("["),
|
||||
String("]"),
|
||||
String(".."))
|
||||
select new LexicalToken(LexicalTokenType.Delimiter, token);
|
||||
|
||||
return singleCharTokenParser | semicolonParser | periodParser;
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> OperatorParser()
|
||||
{
|
||||
Parser<char, LexicalToken> lessParser = from token in Char('<')
|
||||
from _ in Char('=').LookAhead().Not()
|
||||
select new LexicalToken(LexicalTokenType.Operator, "<");
|
||||
|
||||
Parser<char, LexicalToken> greaterParser = from token in Char('>')
|
||||
from _ in Char('=').LookAhead().Not()
|
||||
select new LexicalToken(LexicalTokenType.Operator, ">");
|
||||
|
||||
Parser<char, LexicalToken> otherParsers = from token in Choice(
|
||||
String("="),
|
||||
String("!="),
|
||||
String("<="),
|
||||
String(">="),
|
||||
String("+"),
|
||||
String("-"),
|
||||
String("*"),
|
||||
String("/"),
|
||||
String(":="))
|
||||
select new LexicalToken(LexicalTokenType.Operator, token);
|
||||
|
||||
return otherParsers | lessParser | greaterParser;
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> ConstIntegerParser()
|
||||
{
|
||||
return from nums in AsciiDigit().Many1()
|
||||
from _ in Char('.').LookAhead().Not()
|
||||
select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray()));
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> ConstFloatParser()
|
||||
{
|
||||
return from integer in AsciiDigit().Many1()
|
||||
from _ in Char('.')
|
||||
from fraction in AsciiDigit().Many1()
|
||||
select new LexicalToken(LexicalTokenType.ConstFloat,
|
||||
new string(integer.ToArray()) + '.' + new string(fraction.ToArray()));
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> IdentifierParser()
|
||||
{
|
||||
return from first in AsciiLetter() | Char('_')
|
||||
from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many()
|
||||
select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray()));
|
||||
}
|
||||
|
||||
public static Parser<char, Unit> CommentParser()
|
||||
{
|
||||
return Any<char>().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance);
|
||||
}
|
||||
|
||||
public static Parser<char, Unit> JunkParser()
|
||||
{
|
||||
return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser();
|
||||
}
|
||||
|
||||
public static Parser<char, LexicalToken> CharParser()
|
||||
{
|
||||
return from str in Any<char>().Quote(Char('\'')).Map(x => new string(x.ToArray()))
|
||||
select str.Length <= 1
|
||||
? new LexicalToken(LexicalTokenType.Character, str)
|
||||
: new LexicalToken(LexicalTokenType.String, str);
|
||||
}
|
||||
|
||||
public static Parser<char, IEnumerable<LexicalToken>> PascalParser()
|
||||
{
|
||||
return JunkParser().SkipTill(Choice(KeywordParser(),
|
||||
DelimiterParser(),
|
||||
OperatorParser(),
|
||||
ConstIntegerParser(),
|
||||
ConstFloatParser(),
|
||||
CharParser(),
|
||||
IdentifierParser())).Many();
|
||||
}
|
||||
}
|
29
CanonSharp.Common/Scanner/LexicalToken.cs
Normal file
29
CanonSharp.Common/Scanner/LexicalToken.cs
Normal file
@@ -0,0 +1,29 @@
|
||||
namespace CanonSharp.Common.Scanner;
|
||||
|
||||
public enum LexicalTokenType
|
||||
{
|
||||
Keyword,
|
||||
ConstInteger,
|
||||
ConstFloat,
|
||||
Operator,
|
||||
Delimiter,
|
||||
Identifier,
|
||||
Character,
|
||||
String
|
||||
}
|
||||
|
||||
public sealed class LexicalToken(LexicalTokenType type, string literalValue) : IEquatable<LexicalToken>
|
||||
{
|
||||
public LexicalTokenType TokenType { get; } = type;
|
||||
|
||||
public string LiteralValue { get; } = literalValue;
|
||||
|
||||
public bool Equals(LexicalToken? other) =>
|
||||
other is not null && TokenType == other.TokenType && LiteralValue == other.LiteralValue;
|
||||
|
||||
public override bool Equals(object? obj) => obj is LexicalToken other && Equals(other);
|
||||
|
||||
public override int GetHashCode() => TokenType.GetHashCode() ^ LiteralValue.GetHashCode();
|
||||
|
||||
public override string ToString() => $"<{TokenType}>'{LiteralValue}'";
|
||||
}
|
62
CanonSharp.Common/Scanner/StringReadState.cs
Normal file
62
CanonSharp.Common/Scanner/StringReadState.cs
Normal file
@@ -0,0 +1,62 @@
|
||||
using CanonSharp.Combinator.Abstractions;
|
||||
|
||||
namespace CanonSharp.Common.Scanner;
|
||||
|
||||
/// <summary>
|
||||
/// 字符串输入流状态
|
||||
/// </summary>
|
||||
public sealed class StringReadState : IReadState<char, StringReadState>
|
||||
{
|
||||
private readonly string _source;
|
||||
|
||||
private readonly int _index;
|
||||
|
||||
public char Current => _source[_index];
|
||||
|
||||
public bool HasValue => _index < _source.Length;
|
||||
|
||||
public StringReadState Next => new(_source, _index + 1);
|
||||
|
||||
private StringReadState(string source, int index)
|
||||
{
|
||||
_source = source;
|
||||
_index = index;
|
||||
}
|
||||
|
||||
public StringReadState(string source) : this(source, 0)
|
||||
{
|
||||
}
|
||||
|
||||
public bool Equals(StringReadState? other)
|
||||
{
|
||||
if (other is null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return _source == other._source && _index == other._index;
|
||||
}
|
||||
|
||||
public override bool Equals(object? obj) => obj is StringReadState other && Equals(other);
|
||||
|
||||
public override int GetHashCode() => _source.GetHashCode() ^ _index;
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return HasValue ? $"{ToReadableString(Current)}<0x{(int)Current:X2}>" : "End of string.";
|
||||
}
|
||||
|
||||
private static string ToReadableString(char token)
|
||||
=> token switch
|
||||
{
|
||||
'\0' => "\\0",
|
||||
'\a' => "\\a",
|
||||
'\b' => "\\b",
|
||||
'\f' => "\\f",
|
||||
'\n' => "\\n",
|
||||
'\r' => "\\r",
|
||||
'\t' => "\\t",
|
||||
'\v' => "\\v",
|
||||
_ => token.ToString(),
|
||||
};
|
||||
}
|
Reference in New Issue
Block a user