diff --git a/CanonSharp.Combinator/Abstractions/FailedResult.cs b/CanonSharp.Combinator/Abstractions/FailedResult.cs new file mode 100644 index 0000000..9fd8a42 --- /dev/null +++ b/CanonSharp.Combinator/Abstractions/FailedResult.cs @@ -0,0 +1,49 @@ +namespace CanonSharp.Combinator.Abstractions; + +/// +/// 失败解析结果基类 +/// +/// 输入流类型 +/// 解析结果类型 +public abstract class FailedResult : ParseResult +{ + public override T Value => throw Exception; + + /// + /// 当前读取到的状态 + /// + public abstract IReadState State { get; } + + /// + /// 解析失败的消息 + /// + public abstract string Message { get; } + + /// + /// 解析失败的异常 + /// + public virtual ParseException Exception => new(ToString()); + + /// + /// 转换该失败结果的类型 + /// + /// 转换之后的结果类型 + /// 转换之后的失败解析类型 + public abstract FailedResult Convert(); + + internal override ParseResult Next(Func> nextParser, + Func, ParseResult> continuation) + => continuation(Convert()); + + public override ParseResult Map(Func map) + => Convert(); + + public override TResult CaseOf(Func, TResult> successfulHandler, + Func, TResult> failedHandler) + => failedHandler(this); + + public override string ToString() + { + return $"Parse Failed: {Message}."; + } +} diff --git a/CanonSharp.Combinator/Abstractions/IReadState.cs b/CanonSharp.Combinator/Abstractions/IReadState.cs new file mode 100644 index 0000000..00b1074 --- /dev/null +++ b/CanonSharp.Combinator/Abstractions/IReadState.cs @@ -0,0 +1,26 @@ +namespace CanonSharp.Combinator.Abstractions; + +/// +/// 输入流的读取状态 +/// +/// 输入流元素类型 +public interface IReadState +{ + public TToken Current { get; } + + public bool HasValue { get; } +} + +/// +/// 输入流的读取状态 +/// +/// 输入流元素类型 +/// 下一个读取状态的类型 +public interface IReadState : IReadState, IEquatable + where TState : IReadState +{ + /// + /// 下一个读取状态 + /// + TState Next { get; } +} diff --git a/CanonSharp.Combinator/Abstractions/ParseResult.cs b/CanonSharp.Combinator/Abstractions/ParseResult.cs new file mode 100644 index 0000000..7f58852 --- /dev/null +++ b/CanonSharp.Combinator/Abstractions/ParseResult.cs @@ -0,0 +1,48 @@ +namespace CanonSharp.Combinator.Abstractions; + +/// +/// 解析器结果 +/// +/// 输入流类型 +/// 实际结果类型 +public abstract class ParseResult +{ + /// + /// 实际结果对象 + /// + public abstract T Value { get; } + + protected ParseResult() + { + + } + + /// + /// 在当前结果上应用下一个解析器 + /// + /// 下一个解析器的函数 + /// 处理解析结果的后继函数 + /// 下一个解析器函数返回的解析结果类型 + /// 最终的解析结果类型 + /// + internal abstract ParseResult Next(Func> nextParser, + Func, ParseResult> continuation); + + /// + /// 映射结果 + /// + /// 映射结果的函数 + /// 映射结果函数返回解析结果的类型 + /// 最终的解析结果 + public abstract ParseResult Map(Func map); + + /// + /// 在成功或者失败解析结果上应用不同的后继函数 + /// + /// 在成功解析结果上应用的函数 + /// 在失败解析结构上应用的函数 + /// 最后返回解析结果的类型 + /// 最后的解析结果 + public abstract TResult CaseOf(Func, TResult> successfulHandler, + Func, TResult> failedHandler); +} diff --git a/CanonSharp.Combinator/Abstractions/Parser.cs b/CanonSharp.Combinator/Abstractions/Parser.cs new file mode 100644 index 0000000..2159a95 --- /dev/null +++ b/CanonSharp.Combinator/Abstractions/Parser.cs @@ -0,0 +1,43 @@ +using CanonSharp.Combinator.Extensions; + +namespace CanonSharp.Combinator.Abstractions; + +/// +/// 解析器抽象基类 +/// +/// 输入流类型 +/// 解析结果类型 +public abstract class Parser +{ + /// + /// 解析器运行函数 + /// + /// 解析的输入流状态 + /// 运行之后的后继函数 + /// 输入流状态类型 + /// 后继函数运行之后的解析结果类型 + /// + internal abstract ParseResult Run(TState state, + Func, ParseResult> continuation) + where TState : IReadState; + + public ParseResult Parse(TState state) where TState : IReadState + { + return Run(state); + } + + private ParseResult Run(TState state) where TState : IReadState + { + try + { + return Run(state, result => result); + } + catch (Exception e) + { + return ParseResultBuilder.Fail(e, state); + } + } + + public static Parser operator |(Parser a, Parser b) + => a.Alternative(b); +} diff --git a/CanonSharp.Combinator/Abstractions/SuccessfulResult.cs b/CanonSharp.Combinator/Abstractions/SuccessfulResult.cs new file mode 100644 index 0000000..fc76084 --- /dev/null +++ b/CanonSharp.Combinator/Abstractions/SuccessfulResult.cs @@ -0,0 +1,33 @@ +namespace CanonSharp.Combinator.Abstractions; + +/// +/// 成功解析结果基类 +/// +/// 实际的解析结果 +/// 输入流类型 +/// 实际的解析结果类型 +public abstract class SuccessfulResult(T value) : ParseResult +{ + public override T Value => value; + + /// + /// 运行下一个解析器 + /// + /// 下一个解析器 + /// 处理解析结果的后继函数 + /// 下一个解析器返回的结果类型 + /// 最终的结果类型 + /// 最终的结果 + protected abstract ParseResult RunNext(Parser parser, + Func, ParseResult> continuation); + + internal override ParseResult Next(Func> nextParser, + Func, ParseResult> continuation) + => RunNext(nextParser(Value), continuation); + + public override TResult CaseOf(Func, TResult> successfulHandler, + Func, TResult> failedHandler) + => successfulHandler(this); + + public override string ToString() => Value?.ToString() ?? string.Empty; +} diff --git a/CanonSharp.Combinator/CanonSharp.Combinator.csproj b/CanonSharp.Combinator/CanonSharp.Combinator.csproj new file mode 100644 index 0000000..3a63532 --- /dev/null +++ b/CanonSharp.Combinator/CanonSharp.Combinator.csproj @@ -0,0 +1,9 @@ + + + + net8.0 + enable + enable + + + diff --git a/CanonSharp.Combinator/Extensions/ParserExtensions.cs b/CanonSharp.Combinator/Extensions/ParserExtensions.cs new file mode 100644 index 0000000..1493e2b --- /dev/null +++ b/CanonSharp.Combinator/Extensions/ParserExtensions.cs @@ -0,0 +1,572 @@ +using System.Runtime.CompilerServices; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Parsers.Bases; +using CanonSharp.Combinator.Parsers.Modifiers; +using static CanonSharp.Combinator.ParserBuilder; + +namespace CanonSharp.Combinator.Extensions; + +public static class ParserExtensions +{ + #region BasesParser + + /// + /// 选择组合子 + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Alternative(this Parser first, Parser second) + => new AlternativeParser(first, second); + + /// + /// 选择组合子 + /// 按照失败的解析结果选择第二个解析器 + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Alternative(this Parser parser, + Func, Parser> resume) + => new ResumeParser(parser, resume); + + /// + /// 单子解析器组合子 + /// + /// + /// 按照输出指定下一个解析器的函数 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Bind(this Parser parser, + Func> next) + => new BindParser(parser, next); + + /// + /// 映射解析器组合子 + /// + /// + /// 按照输出指定结果的函数 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Map(this Parser parser, Func map) + => new MapParser(parser, map); + + /// + /// 映射解析器组合子 + /// + /// + /// 最后的输出结果 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Map(this Parser parser, TResult result) + => parser.Map(_ => result); + + /// + /// 下一个解析器组合子 + /// + /// + /// 输入成功结果输出下一个解析器的函数 + /// 输入失败结果输出下一个解析器的函数 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Next(this Parser parser, + Func> next, + Func, Parser> failedNext) + => new NextParser(parser, next, failedNext); + + /// + /// 下一个解析器组合子 + /// + /// + /// 输入成功结果输出下一个解析器的函数 + /// 输出失败结果输出后续结果的函数 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Next(this Parser parser, + Func> next, Func, TResult> failedHandler) + => parser.Next(next, failedResult => Pure(failedHandler(failedResult))); + + /// + /// 下一个解析器组合子 + /// + /// + /// 输出成功结果输出下一个解析器的函数 + /// 如果失败之后返回该结果 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Next(this Parser parser, + Func> next, TResult failedResult) + => parser.Next(next, _ => Pure(failedResult)); + + /// + /// 下一个解析器组合子 + /// + /// + /// 输入成功结果返回新的结果 + /// 输入失败结果返回下一个解析器的函数 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Next(this Parser parser, + Func nextResult, Func, Parser> failedResume) + => parser.Next(x => Pure(nextResult(x)), failedResume); + + /// + /// 下一个解析器组合子 + /// + /// + /// 输入成功结果返回新的结果 + /// 输入失败结果返回新的结果 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Next(this Parser parser, + Func nextResult, Func, TResult> failedResult) + => new SuccessfulMapParser(parser, nextResult, failedResult); + + /// + /// 下一个解析器组合子 + /// + /// + /// 输入成功结果返回新结果的函数 + /// 返回的失败结果 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Next(this Parser parser, + Func successfulHandler, TResult failedResult) + => parser.Next(successfulHandler, _ => failedResult); + + #endregion + + #region ModifiedParser + + /// + /// 在解析结果上执行指定操作 + /// + /// + /// 成功结果上执行的操作 + /// 失败结果上执行的操作 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Do(this Parser parser, Action successfulAction, + Action> failedAction) + => new DoParser(parser, successfulAction, failedAction); + + /// + /// 在解析结果上执行指定的操作 + /// + /// + /// 成功结果上执行的操作 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Do(this Parser parser, Action successfulAction) + => parser.Do(successfulAction, _ => { }); + + /// + /// 向前看解析器 + /// 执行解析器的同时不消耗输入流 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser LookAhead(this Parser parser) + => new LookAheadParser(parser); + + /// + /// 翻转上游解析器的输出结果 + /// + /// + /// 翻转之后的输出结果 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Not(this Parser parser, T result) + => new ReverseParser(parser, result); + + /// + /// 翻转上游解析器的输出结果 + /// 输出结果默认为Unit + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Not(this Parser parser) + => parser.Not(Unit.Instance); + + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Try(this Parser parser, + Func, T> resume) + => new TryParser(parser, resume); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Try(this Parser parser, T result) + => parser.Try(_ => result); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Try(this Parser parser) + => parser.Next(_ => true, false).Try(false); + + #endregion + + #region Combinators + + /// + /// 连接两个解析器,返回左边解析器的结果 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Left(this Parser left, + Parser right) + => left.Bind(right.Map); + + /// + /// 连接两个解析器,返回右边解析器的结果 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Right(this Parser left, + Parser right) + => left.Bind(_ => right); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Parser> ManyRecursively(this Parser parser, + IEnumerable result) + => parser.Next(x => parser.ManyRecursively(result.Append(x)), result); + + /// + /// 将上游解析器运行零或若干次 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Many(this Parser parser) + => parser.ManyRecursively([]); + + /// + /// 将上游解析器运行若干次 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Many1(this Parser parser) + => parser.Bind(x => parser.ManyRecursively([x])); + + /// + /// 跳过执行上游解析器运行零或若干次 + /// 跳过执行不是不执行 + /// 而是不返回结果 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser SkipMany(this Parser parser) + => Fix(self => parser.Next(_ => self, Unit.Instance)); + + /// + /// 跳过执行上游解析器运行若干次 + /// 跳过执行不是不执行 + /// 而是不返回结果 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser SkipMany1(this Parser parser) + => parser.Right(parser.SkipMany()); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Parser ChainRecursively(Func> chain, T value) + => chain(value).Next(x => ChainRecursively(chain, x), value); + + /// + /// 链式解析器组合子 + /// 按照解析结果决定下一个解析器 + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Chain(this Parser parser, Func> chain) + => parser.Bind(x => ChainRecursively(chain, x)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Parser> ManyTillRecursively(this Parser parser, + Parser terminator, IEnumerable result) + => terminator.Next(_ => Pure>(result), + _ => parser.Bind(x => parser.ManyTillRecursively(terminator, result.Append(x)))); + + /// + /// 执行指定解析器直到终结解析器执行成功的组合子 + /// 指定解析器可以执行零次或者多次 + /// + /// 指定重复执行的解析器 + /// 判断是否终结的解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> ManyTill(this Parser parser, + Parser terminator) + => parser.ManyTillRecursively(terminator, []); + + /// + /// 执行指定解析器直到终结解析器执行成功的组合子 + /// 指定解析器至少执行一次 + /// + /// 指定重复执行的解析器 + /// 判断是否终结的解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Many1Till(this Parser parser, + Parser terminator) + => parser.Bind(x => parser.ManyTillRecursively(terminator, [x])); + + /// + /// 跳过指定解析器直到终结解析器执行成功的组合子 + /// 指定解析器可以执行零次或者若干次 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser SkipTill(this Parser parser, + Parser terminator) + => Fix(self => terminator | parser.Right(self)); + + /// + /// 跳过指定解析器直到终结解析器执行成功的组合子 + /// 指定解析器至少要执行一次 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Skip1Till(this Parser parser, + Parser terminator) + => parser.Right(parser.SkipTill(terminator)); + + /// + /// 解析直到指定的解析器识别成功 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Match(this Parser parser) + => SkipTill(Any(), parser); + + /// + /// 在左右两个解析器指定的范围内进行解析 + /// 解析类似于左右括号和左右引号类似的句式 + /// + /// + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Quote(this Parser parser, + Parser left, Parser right) + => left.Right(parser.ManyTill(right)); + + /// + /// 在同一个解析器指定的范围内进行解析 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Quote(this Parser parser, + Parser quotedParser) + => parser.Quote(quotedParser, quotedParser); + + /// + /// 解析由分隔符解析器分割的多个符号 + /// 例如a,b,c + /// 实际的解析器可以运行零次或者多次 + /// + /// 实际的解析器 + /// 分隔符解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> SeparatedBy1(this Parser parser, + Parser separator) + => parser.Bind(x => separator.Right(parser).ManyRecursively([x])); + + /// + /// 解析由分隔符解析器分割的多个符号 + /// 例如a,b,c + /// 实际的解析器可以运行多次 + /// + /// 实际的解析器 + /// 分隔符解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> SeparatedBy(this Parser parser, + Parser separator) + => parser.SeparatedBy1(separator).Try([]); + + /// + /// 解析直到使用分隔符解析器结束 + /// 例如abc. + /// 实际的解析器可以运行零次或者多次 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> EndBy(this Parser parser, + Parser separator) + => parser.Many().Left(separator); + + /// + /// 解析直到使用分隔符解析器结束 + /// 例如abc. + /// 实际的解析器至少运行一次 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> EndBy1(this Parser parser, + Parser separator) + => parser.Many1().Left(separator); + + /// + /// Separated和End的综合体 + /// 形如a,b,c, + /// 实际的解析器至少运行一次 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> SeparatedOrEndBy1(this Parser parser, + Parser separator) + => parser.SeparatedBy1(separator).Left(separator.Try()); + + /// + /// Separated和End的综合体 + /// 形如a,b,c, + /// 实际的解析器可以运行零次或者多次 + /// + /// + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> SeparatedOrEndBy(this Parser parser, + Parser separator) + => parser.SeparatedOrEndBy1(separator).Try([]); + + #endregion + + #region LINQ + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Select(this Parser parser, + Func selector) + => parser.Map(selector); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser SelectMany(this Parser parser, + Func> selector, Func projector) + => parser.Bind(x => selector(x).Map(y => projector(x, y))); + + #endregion +} diff --git a/CanonSharp.Combinator/Extensions/ReadStateExtensions.cs b/CanonSharp.Combinator/Extensions/ReadStateExtensions.cs new file mode 100644 index 0000000..b0b44c2 --- /dev/null +++ b/CanonSharp.Combinator/Extensions/ReadStateExtensions.cs @@ -0,0 +1,18 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Extensions; + +public static class ReadStateExtensions +{ + public static IEnumerable AsEnumerable(this TState source) + where TState : IReadState + { + TState current = source; + + while (current.HasValue) + { + yield return current; + current = current.Next; + } + } +} diff --git a/CanonSharp.Combinator/ParseException.cs b/CanonSharp.Combinator/ParseException.cs new file mode 100644 index 0000000..d925dbd --- /dev/null +++ b/CanonSharp.Combinator/ParseException.cs @@ -0,0 +1,15 @@ +namespace CanonSharp.Combinator; + +/// +/// 解析过程中的异常 +/// +public class ParseException : Exception +{ + public ParseException(string message) : base(message) + { + } + + public ParseException(string message, Exception innerException) : base(message, innerException) + { + } +} diff --git a/CanonSharp.Combinator/ParseResultBuilder.cs b/CanonSharp.Combinator/ParseResultBuilder.cs new file mode 100644 index 0000000..3f09e0b --- /dev/null +++ b/CanonSharp.Combinator/ParseResultBuilder.cs @@ -0,0 +1,67 @@ +using System.Runtime.CompilerServices; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Results; + +namespace CanonSharp.Combinator; + +/// +/// 相关的扩展方法 +/// +public static class ParseResultBuilder +{ + /// + /// 生成解析成功的结果 + /// + /// 解析成功的对象 + /// 下一个输入流状态 + /// 输入流类型 + /// 输入流状态类型 + /// 解析成功的对象类型 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ParseResult Succeed(T value, TState state) + where TState : IReadState + => new InternalSuccessfulResult(value, state); + + /// + /// 生成错误类型的解析失败结果 + /// + /// 解析的输入流状态 + /// 输入流类型 + /// 输入流状态类型 + /// 解析成功的对象类型 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ParseResult Fail(TState state) + where TState : IReadState + => new FailedResultWithError(state); + + + /// + /// 生成消息类型的解析失败结果 + /// + /// 错误消息 + /// 输入流状态 + /// 输入流类型 + /// 输入流状态类型 + /// 解析成功的对象类型 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ParseResult Fail(string message, TState state) + where TState : IReadState + => new FailedResultWithMessage(message, state); + + /// + /// 生成异常类型的解析失败结果 + /// + /// 解析异常 + /// 输入流状态 + /// 输入流类型 + /// 输入流状态类型 + /// 解析成功的对象类型 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ParseResult Fail(Exception exception, TState state) + where TState : IReadState + => new FailedResultWithException(exception, state); +} diff --git a/CanonSharp.Combinator/ParserBuilder.cs b/CanonSharp.Combinator/ParserBuilder.cs new file mode 100644 index 0000000..5d84120 --- /dev/null +++ b/CanonSharp.Combinator/ParserBuilder.cs @@ -0,0 +1,223 @@ +using System.Runtime.CompilerServices; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Combinator.Parsers.Bases; +using CanonSharp.Combinator.Parsers.Primitives; + +namespace CanonSharp.Combinator; + +public static class ParserBuilder +{ + #region PrimitiveParser + + // 对应Parsers.Primitives命名空间下的Parser实现 + + /// + /// 直接成功的解析器 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Pure(T value) + => new PureParser(value); + + /// + /// 直接成功的解析器 + /// + /// 生成结果的函数 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Pure(Func, T> valueFunc) + => new DelayedPureParser(valueFunc); + + /// + /// 生成空结果的解析器 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Null() => Pure(Unit.Instance); + + /// + /// 失败的解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Fail() => new FailedParser(); + + /// + /// 失败的解析器 + /// + /// 失败的原因 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Fail(string message) => new FailedParserWithMessage(message); + + /// + /// 失败的解析器 + /// + /// 产生失败原因的函数 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Fail(Func, string> messageFunc) => + new FailedParserWithDelayedMessage(messageFunc); + + /// + /// 失败的解析器 + /// + /// 失败的异常 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Fail(Exception exception) => + new FailedParserWithException(exception); + + /// + /// 满足指定条件的解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Satisfy(Func predicate) + => new SatisfyParser(predicate); + + /// + /// 识别任何输入的解析器 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Any() => Satisfy(_ => true); + + /// + /// 识别指定输入元素的解析器 + /// + /// 识别的指定元素 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Token(TToken token) + => Satisfy(t => EqualityComparer.Default.Equals(t, token)); + + /// + /// 跳过指定数量输入元素的解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Skip(int count) => new SkipParser(count); + + /// + /// 识别指定数量输入元素的解析器 + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Take(int count) => new TakeParser(count); + + #endregion + + #region Bases + + /// + /// 按照给定的函数修改解析器的解析器 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Fix(Func, Parser> parserFix) + => new FixParser(parserFix); + + #endregion + + #region Combinators + + /// + /// 按照给定的解析器组依次尝试 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Choice(IEnumerable> parsers) + => parsers.Reverse().Aggregate((next, parser) => parser.Alternative(next)); + + /// + /// 按照给定的解析器组依次尝试 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Choice(params Parser[] parsers) + => Choice(parsers.AsEnumerable()); + + /// + /// 顺序应用所有输入的解析器 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Sequence(IEnumerable> parsers) + => parsers.Reverse().Aggregate(Pure>([]), + (next, parser) => parser.Bind( + x => next.Map(result => result.Prepend(x)))); + + /// + /// 顺序应用输入输入的解析器 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Sequence(params Parser[] parsers) + => Sequence(parsers.AsEnumerable()); + + /// + /// 识别输入令牌直到终止解析器运行成功 + /// 在终止解析器之前可以存在零个或者多个输入令牌 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> TakeTill(Parser terminator) + => Any().ManyTill(terminator); + + /// + /// 识别输入令牌直到终止解析器运行成功 + /// 在终止解析器之前至少存在一个输入令牌 + /// + /// + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser> Take1Till(Parser termintor) + => Any().Many1Till(termintor); + + + #endregion +} diff --git a/CanonSharp.Combinator/Parsers/Bases/AlternativeParser.cs b/CanonSharp.Combinator/Parsers/Bases/AlternativeParser.cs new file mode 100644 index 0000000..af68952 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Bases/AlternativeParser.cs @@ -0,0 +1,21 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Bases; + +/// +/// 选择解析器 +/// 如果第一个不成功则调用第二个 +/// +/// 第一个解析器 +/// 第二个解析器 +/// 输入流类型 +/// 解析器结果类型 +internal sealed class AlternativeParser(Parser first, Parser second) + : Parser +{ + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + { + return first.Run(state, result => result.CaseOf(continuation, _ => second.Run(state, continuation))); + } +} diff --git a/CanonSharp.Combinator/Parsers/Bases/BindParser.cs b/CanonSharp.Combinator/Parsers/Bases/BindParser.cs new file mode 100644 index 0000000..10be255 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Bases/BindParser.cs @@ -0,0 +1,20 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Bases; + +/// +/// 单子解析器 +/// +/// 上游解析器 +/// 下游解析器生成函数 +/// 输入流类型 +/// 上游解析器结果类型 +/// 下游解析器结果类型 +internal sealed class BindParser( + Parser parser, + Func> next) : Parser +{ + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + => parser.Run(state, result => result.Next(next, continuation)); +} diff --git a/CanonSharp.Combinator/Parsers/Bases/FixParser.cs b/CanonSharp.Combinator/Parsers/Bases/FixParser.cs new file mode 100644 index 0000000..a8f3b3a --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Bases/FixParser.cs @@ -0,0 +1,33 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Bases; + +/// +/// 修正?解析器 +/// 感觉是一种递归的高级实现? +/// +/// +/// +/// +internal sealed class FixParser : Parser +{ + private readonly Parser _parser; + + public FixParser(Func, Parser> func) + { + _parser = func(this); + } + + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + => _parser.Run(state, continuation); +} + +internal sealed class FixParser( + Func>, TParameter, Parser> func, + TParameter parameter) : Parser +{ + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + => func(p => new FixParser(func, p), parameter).Run(state, continuation); +} diff --git a/CanonSharp.Combinator/Parsers/Bases/MapParser.cs b/CanonSharp.Combinator/Parsers/Bases/MapParser.cs new file mode 100644 index 0000000..0a94072 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Bases/MapParser.cs @@ -0,0 +1,21 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Bases; + +/// +/// 映射解析器 +/// 提供一个函数修改上游解析器返回的结果 +/// +/// 上游解析器 +/// 修改上游解析器返回结果的 +/// +/// +/// +internal sealed class MapParser( + Parser parser, + Func func) : Parser +{ + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + => parser.Run(state, result => continuation(result.Map(func))); +} diff --git a/CanonSharp.Combinator/Parsers/Bases/NextParser.cs b/CanonSharp.Combinator/Parsers/Bases/NextParser.cs new file mode 100644 index 0000000..67b4242 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Bases/NextParser.cs @@ -0,0 +1,26 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Bases; + +/// +/// 下一步解析器 +/// +/// 上游解析器 +/// 成功情况下的解析器函数 +/// 失败情况下的解析器函数 +/// 输入流类型 +/// 上游解析器结果类型 +/// 最终解析结果类型 +internal sealed class NextParser( + Parser parser, + Func> successfulParser, + Func, Parser> failedParser) : Parser +{ + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + { + return parser.Run(state, result => result.CaseOf( + successfulResult => successfulResult.Next(successfulParser, continuation), + failedResult => failedParser(failedResult).Run(state, continuation))); + } +} diff --git a/CanonSharp.Combinator/Parsers/Bases/ResumeParser.cs b/CanonSharp.Combinator/Parsers/Bases/ResumeParser.cs new file mode 100644 index 0000000..9cf4d76 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Bases/ResumeParser.cs @@ -0,0 +1,24 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Bases; + +/// +/// 恢复解析器 +/// 在上游解析器失败的情况下调用指定恢复函数返回的解析器 +/// +/// 上游解析器 +/// 返回新解析器的恢复函数 +/// 输入令牌类型 +/// 解析结果类型 +internal sealed class ResumeParser( + Parser parser, + Func, Parser> failedHandler) : Parser +{ + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + { + return parser.Run(state, + result => result.CaseOf(continuation, + failedResult => failedHandler(failedResult).Run(state, continuation))); + } +} diff --git a/CanonSharp.Combinator/Parsers/ModifiedParser.cs b/CanonSharp.Combinator/Parsers/ModifiedParser.cs new file mode 100644 index 0000000..6a70c79 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/ModifiedParser.cs @@ -0,0 +1,27 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers; + +/// +/// 修改解析器返回结果的解析器基类 +/// +/// 需要修改结果的解析器 +/// 输入流类型 +/// 需要修改结果的解析器 +/// 最终返回的解析结果 +public abstract class ModifiedParser(Parser parser) : Parser +{ + protected abstract ParseResult Fail(TState state, + FailedResult failedResult) + where TState : IReadState; + + protected abstract ParseResult Succeed(TState state, + SuccessfulResult successfulResult) + where TState : IReadState; + + internal override ParseResult Run(TState state, + Func, ParseResult> continuation) + => parser.Run(state, result => result.CaseOf( + success => continuation(Succeed(state, success)), + failure => continuation(Fail(state, failure)))); +} diff --git a/CanonSharp.Combinator/Parsers/Modifiers/DoParser.cs b/CanonSharp.Combinator/Parsers/Modifiers/DoParser.cs new file mode 100644 index 0000000..332bb87 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Modifiers/DoParser.cs @@ -0,0 +1,30 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Modifiers; + +/// +/// 对结果运行指定操作,但是不做修改操作的解析器 +/// +/// 上游解析器 +/// 对成功结果的操作 +/// 对失败结果的操作 +/// 输入流类型 +/// 解析结果类型 +internal sealed class DoParser( + Parser parser, + Action succeed, + Action> fail) : ModifiedParser(parser) +{ + protected override ParseResult Succeed(TState state, + SuccessfulResult successfulResult) + { + succeed(successfulResult.Value); + return successfulResult; + } + + protected override ParseResult Fail(TState state, FailedResult failedResult) + { + fail(failedResult); + return failedResult; + } +} diff --git a/CanonSharp.Combinator/Parsers/Modifiers/LookAheadParser.cs b/CanonSharp.Combinator/Parsers/Modifiers/LookAheadParser.cs new file mode 100644 index 0000000..7210a6c --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Modifiers/LookAheadParser.cs @@ -0,0 +1,21 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Modifiers; + +/// +/// 向前看解析器 +/// 使用传入的解析器向前解析 +/// 但是返回的结果中输入流读取状态不前移 +/// +/// 需要向前看的解析器 +/// 输入流令牌 +/// 返回的解析结果类型 +internal sealed class LookAheadParser(Parser parser) : ModifiedParser(parser) +{ + protected override ParseResult Succeed(TState state, + SuccessfulResult successfulResult) + => ParseResultBuilder.Succeed(successfulResult.Value, state); + + protected override ParseResult Fail(TState state, FailedResult failedResult) + => ParseResultBuilder.Fail($"Failed when looking ahead: {failedResult}", state); +} diff --git a/CanonSharp.Combinator/Parsers/Modifiers/ReverseParser.cs b/CanonSharp.Combinator/Parsers/Modifiers/ReverseParser.cs new file mode 100644 index 0000000..14655f5 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Modifiers/ReverseParser.cs @@ -0,0 +1,26 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Modifiers; + +/// +/// 翻转结果的解析器 +/// 当成功时失败 +/// 当失败时返回指定的成功结果 +/// +/// 上游解析器 +/// 期望中的结果 +/// 输入流的类型 +/// 上游解析器结果类型 +/// 最终的返回结果 +internal sealed class ReverseParser(Parser parser, T result) + : ModifiedParser(parser) +{ + protected override ParseResult Succeed(TState state, + SuccessfulResult successfulResult) + => ParseResultBuilder.Fail($"Unexpected successful result: {successfulResult.Value}", + state); + + protected override ParseResult Fail(TState state, + FailedResult failedResult) + => ParseResultBuilder.Succeed(result, state); +} diff --git a/CanonSharp.Combinator/Parsers/Modifiers/SuccessfulMapParser.cs b/CanonSharp.Combinator/Parsers/Modifiers/SuccessfulMapParser.cs new file mode 100644 index 0000000..cfb4e0c --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Modifiers/SuccessfulMapParser.cs @@ -0,0 +1,26 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Modifiers; + +/// +/// 成功映射的解析器 +/// +/// 上游解析器 +/// 当上游成功时的处理函数 +/// 当上游失败时的处理函数 +/// 输入流类型 +/// 上游解析器解析结果类型 +/// 最终的解析结果类型 +internal sealed class SuccessfulMapParser( + Parser parser, + Func successfulHandler, + Func, T> failedHandler) : ModifiedParser(parser) +{ + protected override ParseResult Succeed(TState state, + SuccessfulResult successfulResult) + => successfulResult.Map(successfulHandler); + + protected override ParseResult Fail(TState state, + FailedResult failedResult) + => ParseResultBuilder.Succeed(failedHandler(failedResult), state); +} diff --git a/CanonSharp.Combinator/Parsers/Modifiers/TryParser.cs b/CanonSharp.Combinator/Parsers/Modifiers/TryParser.cs new file mode 100644 index 0000000..88c0ab6 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Modifiers/TryParser.cs @@ -0,0 +1,23 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Modifiers; + +/// +/// 尝试的解析器 +/// 当成功时直接返回原结果 +/// 当失败时调用resume函数处理失败结果并返回成功结果 +/// +/// 上游解析器 +/// 处理失败结果的恢复函数 +/// 输入流令牌 +/// 解析器返回结果类型 +internal sealed class TryParser(Parser parser, Func, T> resume) + : ModifiedParser(parser) +{ + protected override ParseResult Succeed(TState state, + SuccessfulResult successfulResult) + => successfulResult; + + protected override ParseResult Fail(TState state, FailedResult failedResult) + => ParseResultBuilder.Succeed(resume(failedResult), state); +} diff --git a/CanonSharp.Combinator/Parsers/PrimitiveParser.cs b/CanonSharp.Combinator/Parsers/PrimitiveParser.cs new file mode 100644 index 0000000..c53fb82 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/PrimitiveParser.cs @@ -0,0 +1,25 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers; + +/// +/// 解析器原型基类 +/// 实际上就是处理了一个后继调用 +/// +/// 输入流类型 +/// 解析结果的类型 +public abstract class PrimitiveParser : Parser +{ + /// + /// 运行解析器 返回解析结果 + /// + /// 当前输入流的状态 + /// 输入流状态的类型 + /// 解析结果 + protected abstract ParseResult Run(TState state) + where TState : IReadState; + + internal sealed override ParseResult Run(TState state, + Func, ParseResult> continuation) + => continuation(Run(state)); +} diff --git a/CanonSharp.Combinator/Parsers/Primitives/FailedParser.cs b/CanonSharp.Combinator/Parsers/Primitives/FailedParser.cs new file mode 100644 index 0000000..4477e9c --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Primitives/FailedParser.cs @@ -0,0 +1,51 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Primitives; + +/// +/// 直接失败的解析器 +/// +/// 输入流类型 +/// 解析结果的类型 +internal sealed class FailedParser : PrimitiveParser +{ + protected override ParseResult Run(TState state) + => ParseResultBuilder.Fail(state); +} + +/// +/// 含有失败信息的失败解析器 +/// +/// 失败信息 +/// 输入流类型 +/// 解析结果的类型 +internal sealed class FailedParserWithMessage(string message) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + => ParseResultBuilder.Fail(message, state); +} + +/// +/// 按照输入状态产生失败信息的失败解析器 +/// +/// 产生失败信息的函数 +/// 输入流类型 +/// 解析结果的类型 +internal sealed class FailedParserWithDelayedMessage(Func, string> messageFunc) + : PrimitiveParser +{ + protected override ParseResult Run(TState state) + => ParseResultBuilder.Fail(messageFunc(state), state); +} + +/// +/// 含有失败异常的失败解析器 +/// +/// 异常 +/// 输入流类型 +/// 解析结果的类型 +internal sealed class FailedParserWithException(Exception e) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + => ParseResultBuilder.Fail(e, state); +} diff --git a/CanonSharp.Combinator/Parsers/Primitives/PureParser.cs b/CanonSharp.Combinator/Parsers/Primitives/PureParser.cs new file mode 100644 index 0000000..6ded706 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Primitives/PureParser.cs @@ -0,0 +1,27 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Primitives; + +/// +/// 直接成功的解析器 +/// +/// 解析成功返回的值 +/// 输入流类型 +/// 解析成功返回值的类型 +internal sealed class PureParser(T value) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + => ParseResultBuilder.Succeed(value, state); +} + +/// +/// 按照输入状态返回结果的始终成功解析器 +/// +/// 按照输入状态返回解析结果的函数 +/// 输入流类型 +/// 解析成功返回值的类型 +internal sealed class DelayedPureParser(Func, T> valueFunc) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + => ParseResultBuilder.Succeed(valueFunc(state), state); +} diff --git a/CanonSharp.Combinator/Parsers/Primitives/SatisfyParser.cs b/CanonSharp.Combinator/Parsers/Primitives/SatisfyParser.cs new file mode 100644 index 0000000..25ec82c --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Primitives/SatisfyParser.cs @@ -0,0 +1,18 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Parsers.Primitives; + +/// +/// 满足指定条件即成功的解析器 +/// +/// 满足的条件谓词 +/// 输入流类型 +internal sealed class SatisfyParser(Func predicate) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + { + return state.HasValue && predicate(state.Current) + ? ParseResultBuilder.Succeed(state.Current, state.Next) + : ParseResultBuilder.Fail(state); + } +} diff --git a/CanonSharp.Combinator/Parsers/Primitives/SkipParser.cs b/CanonSharp.Combinator/Parsers/Primitives/SkipParser.cs new file mode 100644 index 0000000..ffa1061 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Primitives/SkipParser.cs @@ -0,0 +1,22 @@ +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; + +namespace CanonSharp.Combinator.Parsers.Primitives; + +/// +/// 跳过指定数量的输入令牌 +/// +/// 需要跳过的令牌数量 +/// 输入流类型 +internal sealed class SkipParser(int count) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + { + List result = state.AsEnumerable().Take(count).ToList(); + + return result.Count == count + ? ParseResultBuilder.Succeed(Unit.Instance, + result.Count == 0 ? state : result.Last().Next) + : ParseResultBuilder.Fail("An input does not have required length.", state); + } +} diff --git a/CanonSharp.Combinator/Parsers/Primitives/TakeParser.cs b/CanonSharp.Combinator/Parsers/Primitives/TakeParser.cs new file mode 100644 index 0000000..670c7c0 --- /dev/null +++ b/CanonSharp.Combinator/Parsers/Primitives/TakeParser.cs @@ -0,0 +1,23 @@ +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; + +namespace CanonSharp.Combinator.Parsers.Primitives; + +/// +/// 解析指定数量的解析器 +/// +/// 需要解析的数量 +/// 输入流类型 +internal sealed class TakeParser(int count) : PrimitiveParser> +{ + protected override ParseResult> Run(TState state) + { + List result = state.AsEnumerable().Take(count).ToList(); + + return result.Count == count + ? ParseResultBuilder.Succeed>(result.Select(s => s.Current), + result.Count == 0 ? state : result.Last().Next) + : ParseResultBuilder.Fail>("An input does not have required length.", + state); + } +} diff --git a/CanonSharp.Combinator/Results/FailedResultWithError.cs b/CanonSharp.Combinator/Results/FailedResultWithError.cs new file mode 100644 index 0000000..14efb87 --- /dev/null +++ b/CanonSharp.Combinator/Results/FailedResultWithError.cs @@ -0,0 +1,22 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Results; + +/// +/// 错误类型的失败解析结果 +/// +/// 输入流的类型 +/// 输入流的读取类型 +/// 实际的结果类型 +internal sealed class FailedResultWithError(TState state) : FailedResult + where TState : IReadState +{ + public override IReadState State => state; + + public override string Message => $"Unexpected state: {state}."; + + public override FailedResult Convert() + { + return new FailedResultWithError(state); + } +} diff --git a/CanonSharp.Combinator/Results/FailedResultWithException.cs b/CanonSharp.Combinator/Results/FailedResultWithException.cs new file mode 100644 index 0000000..85005c5 --- /dev/null +++ b/CanonSharp.Combinator/Results/FailedResultWithException.cs @@ -0,0 +1,24 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Results; + +/// +/// 异常类型的失败解析结果 +/// +/// 解析中发生的异常 +/// 当前输入流的状态 +/// 输入流类型 +/// 当前输入流状态的类型 +/// 解析结果的类型 +public class FailedResultWithException(Exception exception, TState state) : FailedResult + where TState : IReadState +{ + public override IReadState State => state; + + public override ParseException Exception => new(ToString(), exception); + + public override string Message => $"Exception occured: {exception}."; + + public override FailedResult Convert() + => new FailedResultWithException(exception, state); +} diff --git a/CanonSharp.Combinator/Results/FailedResultWithMessage.cs b/CanonSharp.Combinator/Results/FailedResultWithMessage.cs new file mode 100644 index 0000000..d28bc2f --- /dev/null +++ b/CanonSharp.Combinator/Results/FailedResultWithMessage.cs @@ -0,0 +1,24 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Results; + +/// +/// 消息类型的失败解析结果 +/// +/// 解析失败的消息 +/// 当前读取的状态 +/// 输入流的类型 +/// 读取状态类型 +/// 解析结果的类型 +internal sealed class FailedResultWithMessage(string message, TState state) : FailedResult + where TState : IReadState +{ + public override IReadState State => state; + + public override string Message => message; + + public override FailedResult Convert() + { + return new FailedResultWithMessage(message, state); + } +} diff --git a/CanonSharp.Combinator/Results/InternalSuccessfulResult.cs b/CanonSharp.Combinator/Results/InternalSuccessfulResult.cs new file mode 100644 index 0000000..3934453 --- /dev/null +++ b/CanonSharp.Combinator/Results/InternalSuccessfulResult.cs @@ -0,0 +1,23 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Combinator.Results; + +/// +/// 实际实现的解析成功结果 +/// +/// 解析结果 +/// 解析成功之后的下一个输入流状态 +/// 输入流类型 +/// 输入流的状态类型 +/// 解析结果的类型 +internal sealed class InternalSuccessfulResult(T result, TState state) + : SuccessfulResult(result) + where TState : IReadState +{ + protected override ParseResult RunNext(Parser parser, + Func, ParseResult> continuation) + => parser.Run(state, continuation); + + public override ParseResult Map(Func map) + => new InternalSuccessfulResult(map(Value), state); +} diff --git a/CanonSharp.Combinator/Text/StringParser.cs b/CanonSharp.Combinator/Text/StringParser.cs new file mode 100644 index 0000000..1d20dd8 --- /dev/null +++ b/CanonSharp.Combinator/Text/StringParser.cs @@ -0,0 +1,22 @@ +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Combinator.Parsers; + +namespace CanonSharp.Combinator.Text; + +/// +/// 字符串解析器 +/// +/// 期望的字符串 +/// 字符串比较模式 +public class StringParser(string except, StringComparison comparison) : PrimitiveParser +{ + protected override ParseResult Run(TState state) + { + TState[] states = state.AsEnumerable().Take(except.Length).ToArray(); + string actual = new(states.Select(x => x.Current).ToArray()); + return string.Equals(except, actual, comparison) + ? ParseResultBuilder.Succeed(actual, states.Length == 0 ? state : states.Last().Next) + : ParseResultBuilder.Fail($"Except '{except}' but found '{actual}.", state); + } +} diff --git a/CanonSharp.Combinator/Text/TextParserBuilder.cs b/CanonSharp.Combinator/Text/TextParserBuilder.cs new file mode 100644 index 0000000..2ca83db --- /dev/null +++ b/CanonSharp.Combinator/Text/TextParserBuilder.cs @@ -0,0 +1,123 @@ +using System.Runtime.CompilerServices; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using static CanonSharp.Combinator.ParserBuilder; + +namespace CanonSharp.Combinator.Text; + +public static class TextParserBuilder +{ + /// + /// 识别单个字符 + /// + /// 识别的单个字符 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Char(char token) => Satisfy(x => x == token); + + /// + /// 忽略大小写识别单个字符 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser CharIgnoreCase(char token) => + Satisfy(x => char.ToUpperInvariant(x) == char.ToUpperInvariant(token)); + + /// + /// 识别提供字符串中的一个字符 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser OneOf(string candidate) => Satisfy(candidate.Contains); + + /// + /// 忽略大小写识别字符串中的一个字符 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser OneOfIgnoreCase(string candidate) => + Satisfy(x => candidate.Contains(x, StringComparison.OrdinalIgnoreCase)); + + /// + /// 识别一个字符串 + /// + /// 识别的字符串 + /// 字符串比较方法 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser String(string except, StringComparison comparison) => + new StringParser(except, comparison); + + /// + /// 识别一个字符串 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser String(string except) => String(except, StringComparison.Ordinal); + + /// + /// 忽略大小写识别一个字符串 + /// + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser StringIgnoreCase(string except) => + String(except, StringComparison.OrdinalIgnoreCase); + + /// + /// 识别范围内的所有字符 + /// + /// 包括的起始字符 + /// 包括的终止字符 + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Range(char start, char end) => Satisfy(x => x >= start && x <= end); + + /// + /// 识别Unicode字符类别的解析器 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Letter() => Satisfy(char.IsLetter); + + /// + /// 识别Unicode数字类别的解析器 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Digit() => Satisfy(char.IsDigit); + + /// + /// 识别ASCII字符类别的解析器 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser AsciiLetter() => + Satisfy(x => x is >= 'a' and <= 'z' or >= 'A' and <= 'Z'); + + /// + /// 识别ASCII数字类别的解析器 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser AsciiDigit() => Satisfy(x => x is >= '0' and <= '9'); + + /// + /// 识别Unicode空白类型的字符 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser Space() => Satisfy(char.IsWhiteSpace); + + /// + /// 识别所有的换行符 + /// + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Parser LineBreak() => + OneOf("\u000D\u000A\u0085\u2028\u2029\n").Map(x => x.ToString()) | String("\r\n"); +} diff --git a/CanonSharp.Combinator/Text/TextParserExtensions.cs b/CanonSharp.Combinator/Text/TextParserExtensions.cs new file mode 100644 index 0000000..77df384 --- /dev/null +++ b/CanonSharp.Combinator/Text/TextParserExtensions.cs @@ -0,0 +1,14 @@ +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using static CanonSharp.Combinator.Text.TextParserBuilder; + +namespace CanonSharp.Combinator.Text; + +public static class TextParserExtensions +{ + public static Parser SkipSpaces(this Parser parser) + => Space().SkipTill(parser); + + public static Parser SkipSpaceAndLineBreak(this Parser parser) + => (Space().Map(x => x.ToString()) | LineBreak()).SkipTill(parser); +} diff --git a/CanonSharp.Combinator/Unit.cs b/CanonSharp.Combinator/Unit.cs new file mode 100644 index 0000000..f49514c --- /dev/null +++ b/CanonSharp.Combinator/Unit.cs @@ -0,0 +1,23 @@ +namespace CanonSharp.Combinator; + +/// +/// 单元类型Unit +/// +public readonly struct Unit : IComparable, IEquatable +{ + public static Unit Instance => default; + + public bool Equals(Unit other) => true; + + public int CompareTo(Unit other) => 0; + + public override bool Equals(object? obj) => obj is Unit; + + public override int GetHashCode() => 0; + + public override string ToString() => $"<{nameof(Unit)}>"; + + public static bool operator ==(Unit _0, Unit _1) => true; + + public static bool operator !=(Unit _0, Unit _1) => false; +} diff --git a/CanonSharp.Common/Abstractions/ISourceReader.cs b/CanonSharp.Common/Abstractions/ISourceReader.cs deleted file mode 100644 index c53fa2f..0000000 --- a/CanonSharp.Common/Abstractions/ISourceReader.cs +++ /dev/null @@ -1,17 +0,0 @@ -namespace CanonSharp.Common.Abstractions; - -public interface ISourceReader -{ - /// - /// 偷看一下下一个字符 - /// - /// 看到的下一个字符 - /// - public bool TryPeek(out char c); - - /// - /// 读取下一个字符 - /// - /// - public char Read(); -} diff --git a/CanonSharp.Common/CanonSharp.Common.csproj b/CanonSharp.Common/CanonSharp.Common.csproj index 5771fa3..fbfc2bd 100644 --- a/CanonSharp.Common/CanonSharp.Common.csproj +++ b/CanonSharp.Common/CanonSharp.Common.csproj @@ -11,4 +11,8 @@ + + + + diff --git a/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs b/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs deleted file mode 100644 index bd789ff..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs +++ /dev/null @@ -1,111 +0,0 @@ -namespace CanonSharp.Common.LexicalAnalyzer; - -public class DeterministicState : IEquatable -{ - public Guid Id { get; } - - public Dictionary Transaction { get; } = []; - - public HashSet Closure { get; } - - public DeterministicState(HashSet closure) - { - Id = Guid.NewGuid(); - Closure = closure; - } - - private DeterministicState(DeterministicState state) - { - Id = state.Id; - Transaction = state.Transaction; - Closure = []; - } - - public DeterministicState StripClosure() => new(this); - - public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id); - - public override bool Equals(object? obj) => obj is DeterministicState other && Equals(other); - - public override int GetHashCode() => Id.GetHashCode(); -} - -public class DeterministicFiniteAutomation -{ - public DeterministicState Start { get; } - - public HashSet FinalStates { get; } - - private DeterministicFiniteAutomation(DeterministicState start, HashSet finalStates) - { - Start = start; - FinalStates = finalStates; - } - - private record Pair(HashSet States, DeterministicState State); - - public static DeterministicFiniteAutomation Create(NondeterministicFiniteAutomation nfa) - { - Dictionary map = []; - HashSet visited = []; - Queue queue = []; - HashSet finalStates = []; - - HashSet startClosure = nfa.Start.CalculateEmptyClosure(); - DeterministicState start = new(startClosure); - map.Add(new NondeterministicStateSet(startClosure), start); - queue.Enqueue(new Pair(startClosure, start)); - - while (queue.TryDequeue(out Pair? pair)) - { - if (pair.States.Any(s => nfa.FinalStates.Contains(s))) - { - finalStates.Add(pair.State); - } - - Dictionary> next = []; - - foreach (NondeterministicState state in pair.States) - { - foreach (KeyValuePair> transaction in - state.Transactions.Where(p => !p.Key.IsEmpty)) - { - HashSet closure = []; - - foreach (NondeterministicState s in transaction.Value) - { - closure.UnionWith(s.CalculateEmptyClosure()); - } - - if (next.TryGetValue(transaction.Key.Char, out HashSet? n)) - { - n.UnionWith(closure); - } - else - { - next.Add(transaction.Key.Char, closure); - } - } - } - - foreach (KeyValuePair> transaction in next) - { - NondeterministicStateSet set = new(transaction.Value); - if (!map.TryGetValue(set, out DeterministicState? nextState)) - { - nextState = new DeterministicState(transaction.Value); - map.Add(set, nextState); - } - - pair.State.Transaction.Add(transaction.Key, nextState); - - if (visited.Add(nextState)) - { - queue.Enqueue(new Pair(transaction.Value, nextState)); - } - } - } - - return new DeterministicFiniteAutomation(start, finalStates); - } -} diff --git a/CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs b/CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs deleted file mode 100644 index e31aa89..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs +++ /dev/null @@ -1,49 +0,0 @@ -namespace CanonSharp.Common.LexicalAnalyzer; - -public class EmptyChar : IEquatable -{ - public bool IsEmpty { get; } - - public char Char { get; } - - public static EmptyChar Empty => new(); - - private EmptyChar() - { - IsEmpty = true; - Char = char.MaxValue; - } - - public EmptyChar(char c) - { - IsEmpty = false; - Char = c; - } - - public bool Equals(EmptyChar? other) - { - if (other is null) - { - return false; - } - - if (IsEmpty) - { - return other.IsEmpty; - } - - return Char == other.Char; - } - - public override bool Equals(object? obj) => obj is EmptyChar other && Equals(other); - - public override int GetHashCode() - { - return IsEmpty.GetHashCode() ^ Char.GetHashCode(); - } - - public override string ToString() - { - return IsEmpty ? "ε" : Char.ToString(); - } -} diff --git a/CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs b/CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs deleted file mode 100644 index caa8c2b..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs +++ /dev/null @@ -1,92 +0,0 @@ -using System.Diagnostics.CodeAnalysis; -using CanonSharp.Common.Abstractions; - -namespace CanonSharp.Common.LexicalAnalyzer; - -public class LexicalScanner( - DeterministicState startState, - Dictionary finalStateMap, - HashSet skippedTokens, - ISourceReader reader) -{ - private readonly DeterministicState _startState = startState; - - private readonly List _readHistory = []; - - private DeterministicState _currentState = startState; - - public bool TryRead([NotNullWhen(true)] out LexicalToken? token) - { - while (TryReadInternal(out token)) - { - if (!skippedTokens.Contains(token)) - { - return true; - } - } - - return false; - } - - private bool TryReadInternal([NotNullWhen(true)] out LexicalToken? token) - { - while (reader.TryPeek(out char c)) - { - if (_currentState.Transaction.TryGetValue(c, out DeterministicState? nextState)) - { - // 可以迁移到下一个状态 - _currentState = nextState; - _readHistory.Add(reader.Read()); - } - else - { - // 无法迁移到下一个状态 - if (!finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken)) - { - throw new InvalidOperationException(); - } - - // 当前状态是终止状态 - token = new LexicalToken(possibleToken, new string(_readHistory.ToArray())); - - // 重置状态 - _readHistory.Clear(); - _currentState = _startState; - return true; - } - } - - // 当前状态是终止状态 - if (finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken2)) - { - token = new LexicalToken(possibleToken2, new string(_readHistory.ToArray())); - - _readHistory.Clear(); - _currentState = _startState; - return true; - } - - if (!_currentState.Equals(_startState)) - { - throw new InvalidOperationException(); - } - - token = null; - return false; - } - - public static LexicalScannerBuilder CreateDefaultBuilder() - { - LexicalScannerBuilder builder = new(); - - builder.DefineToken(LexicalToken.LineBreaker); - builder.DefineToken(LexicalToken.WhiteSpace); - - builder.AddSkippedToken(LexicalToken.LineBreaker); - builder.AddSkippedToken(LexicalToken.WhiteSpace); - - return builder; - } - - public static LexicalScannerBuilder CreateEmptyBuilder() => new(); -} diff --git a/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs b/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs deleted file mode 100644 index c04c872..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs +++ /dev/null @@ -1,113 +0,0 @@ -using CanonSharp.Common.Abstractions; - -namespace CanonSharp.Common.LexicalAnalyzer; - -public class LexicalScannerBuilder -{ - private readonly Dictionary _finalStateMap = []; - private readonly List _nondeterministicFiniteAutomations = []; - private readonly HashSet _skippedTokens = []; - - internal LexicalScannerBuilder() - { - - } - - public void DefineToken(LexicalToken token) - { - NondeterministicFiniteAutomation automation = token.Expression.Convert2Nfa(); - _nondeterministicFiniteAutomations.Add(automation); - - foreach (NondeterministicState state in automation.FinalStates) - { - _finalStateMap.Add(state, token); - } - } - - /// - /// 定义词法令牌 - /// - /// 该令牌的正则表达式 - /// 识别该令牌的优先级 - /// 定义好的词法令牌 - public LexicalToken DefineToken(RegularExpression expression, int priority) - { - LexicalToken token = new(expression, priority); - DefineToken(token); - return token; - } - - /// - /// 定义输出时需要跳过的词法令牌 - /// - /// 该令牌的正则表达式 - /// 该令牌的优先级 - public void DefineSkippedToken(RegularExpression expression, int priority) - { - LexicalToken token = DefineToken(expression, priority); - AddSkippedToken(token); - } - - public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token); - - public LexicalScanner Build(ISourceReader reader) - { - NondeterministicFiniteAutomation finaAutomation = Combine(); - DeterministicFiniteAutomation deterministicFiniteAutomation = - DeterministicFiniteAutomation.Create(finaAutomation); - - Dictionary finalTokenMap = []; - - foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates) - { - finalTokenMap.Add(state.StripClosure(), state.Closure - .Where(s => _finalStateMap.ContainsKey(s)) - .Select(s => _finalStateMap[s]) - .OrderByDescending(t => t.Priority) - .First()); - } - - // 清除在分析中不需要的Closure引用 - // 释放内存占用 - Queue queue = []; - HashSet visited = [deterministicFiniteAutomation.Start]; - DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure(); - queue.Enqueue(strippedStartState); - - while (queue.TryDequeue(out DeterministicState? state)) - { - Dictionary transactions = []; - - foreach (KeyValuePair pair in state.Transaction) - { - transactions.Add(pair.Key, pair.Value.StripClosure()); - } - - state.Transaction.Clear(); - foreach (KeyValuePair pair in transactions) - { - state.Transaction.Add(pair.Key, pair.Value); - if (visited.Add(pair.Value)) - { - queue.Enqueue(pair.Value); - } - } - } - - return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader); - } - - private NondeterministicFiniteAutomation Combine() - { - NondeterministicState head = new(); - NondeterministicFiniteAutomation result = new(head, []); - - foreach (NondeterministicFiniteAutomation automation in _nondeterministicFiniteAutomations) - { - head.AddTransaction(EmptyChar.Empty, automation.Start); - result.FinalStates.UnionWith(automation.FinalStates); - } - - return result; - } -} diff --git a/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs b/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs deleted file mode 100644 index 71d97d9..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs +++ /dev/null @@ -1,54 +0,0 @@ -using System.Globalization; - -namespace CanonSharp.Common.LexicalAnalyzer; - -public class LexicalToken : IEquatable -{ - private readonly Guid _tokenId; - - public RegularExpression Expression { get; } - - public int Priority { get; } - - public LexicalToken(RegularExpression expression, int priority) - { - _tokenId = Guid.NewGuid(); - Expression = expression; - Priority = priority; - LiteralValue = string.Empty; - } - - internal LexicalToken(LexicalToken definition, string literalValue) - { - _tokenId = definition._tokenId; - Expression = definition.Expression; - Priority = definition.Priority; - LiteralValue = literalValue; - } - - public string LiteralValue { get; } - - public bool Equals(LexicalToken? other) => other is not null && _tokenId == other._tokenId; - - public override bool Equals(object? obj) => obj is LexicalToken other && Equals(other); - - public override int GetHashCode() => _tokenId.GetHashCode(); - - public static bool operator ==(LexicalToken a, LexicalToken b) => a.Equals(b); - - public static bool operator !=(LexicalToken a, LexicalToken b) => !(a == b); - - /// - /// 匹配所有的空白字符 - /// - public static readonly LexicalToken WhiteSpace = new( - RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator) | - RegularExpression.CharSetOf("\u0009\u000B\u000C"), int.MinValue); - - /// - /// 匹配所有的换行符 - /// - public static readonly LexicalToken LineBreaker = new( - RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029") | - RegularExpression.String("\r\n"), int.MinValue); -} diff --git a/CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs b/CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs deleted file mode 100644 index 330d579..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs +++ /dev/null @@ -1,76 +0,0 @@ -namespace CanonSharp.Common.LexicalAnalyzer; - -public class NondeterministicState : IEquatable -{ - public Guid Id { get; } = Guid.NewGuid(); - - public Dictionary> Transactions { get; } = []; - - public bool Equals(NondeterministicState? other) => other is not null && Id.Equals(other.Id); - - public void AddTransaction(EmptyChar c, NondeterministicState state) - { - if (Transactions.TryGetValue(c, out HashSet? states)) - { - states.Add(state); - } - else - { - Transactions.Add(c, [state]); - } - } - - public override bool Equals(object? obj) => obj is NondeterministicState other && Equals(other); - - public override int GetHashCode() => Id.GetHashCode(); - - public HashSet CalculateEmptyClosure() - { - HashSet result = []; - Queue queue = []; - queue.Enqueue(this); - - while (queue.TryDequeue(out NondeterministicState? state)) - { - result.Add(state); - - if (!state.Transactions.TryGetValue(EmptyChar.Empty, out HashSet? next)) - { - continue; - } - - foreach (NondeterministicState s in next.Where(s => !result.Contains(s))) - { - queue.Enqueue(s); - } - } - - return result; - } -} - -public class NondeterministicStateSet(HashSet states) : IEquatable -{ - private readonly HashSet _states = states; - - public bool Equals(NondeterministicStateSet? other) - { - if (other is null) - { - return false; - } - - return _states.Count == other._states.Count && _states.All(s => other._states.Contains(s)); - } - - public override bool Equals(object? obj) => obj is NondeterministicStateSet other && Equals(other); - - public override int GetHashCode() => _states.Aggregate(0, (current, state) => current ^ state.GetHashCode()); -} - -public class NondeterministicFiniteAutomation(NondeterministicState start, HashSet finalStates) -{ - public NondeterministicState Start { get; } = start; - - public HashSet FinalStates { get; } = finalStates; -} diff --git a/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs b/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs deleted file mode 100644 index 80e2b3c..0000000 --- a/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs +++ /dev/null @@ -1,230 +0,0 @@ -namespace CanonSharp.Common.LexicalAnalyzer; - -public abstract class RegularExpression -{ - public abstract NondeterministicFiniteAutomation Convert2Nfa(); - - /// - /// 匹配空字符串 - /// - public static RegularExpression Empty => new EmptyExpression(); - - /// - /// 匹配单个字符 - /// c - /// - /// - /// - public static RegularExpression Single(char c) => new SymbolExpression(c); - - /// - /// left|right - /// - /// - /// - /// - public static RegularExpression Alternate(RegularExpression left, RegularExpression right) => - new AlternationExpression(left, right); - - public static RegularExpression operator |(RegularExpression left, RegularExpression right) => - new AlternationExpression(left, right); - - /// - /// left-right - /// - /// - /// - /// - public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) => - new ConcatenationExpression(first, second); - - public static RegularExpression operator +(RegularExpression left, RegularExpression right) => - new ConcatenationExpression(left, right); - - /// - /// inner* - /// - /// - /// - public static RegularExpression Kleene(RegularExpression inner) => new KleeneExpression(inner); - - /// - /// value - /// - /// - /// - public static RegularExpression String(string value) => new StringExpression(value); - - public static RegularExpression CharSetOf(string value) => new CharSetExpression(value.ToCharArray()); - - public static RegularExpression CharSetOf(Func predicate) - => new CharSetExpression(Iterate(char.MinValue, char.MaxValue).Where(predicate).ToArray()); - - /// - /// [a-b] - /// - /// - /// - /// - public static RegularExpression Range(char a, char b) => new CharSetExpression(Iterate(a, b).ToArray()); - - private static IEnumerable Iterate(char a, char b) - { - for (char c = a; c <= b; c++) - { - if (c == char.MaxValue) - { - yield break; - } - - yield return c; - } - } -} - -public class EmptyExpression : RegularExpression -{ - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicState final = new(); - NondeterministicState start = new(); - start.AddTransaction(EmptyChar.Empty, final); - - return new NondeterministicFiniteAutomation(start, [final]); - } -} - -public class SymbolExpression(char symbol) : RegularExpression -{ - public char Symbol { get; } = symbol; - - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicState final = new(); - NondeterministicState start = new(); - start.AddTransaction(new EmptyChar(Symbol), final); - - return new NondeterministicFiniteAutomation(start, [final]); - } -} - -public class AlternationExpression(RegularExpression left, RegularExpression right) : RegularExpression -{ - public RegularExpression Left { get; } = left; - - public RegularExpression Right { get; } = right; - - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicFiniteAutomation left = Left.Convert2Nfa(); - NondeterministicFiniteAutomation right = Right.Convert2Nfa(); - - NondeterministicState final = new(); - foreach (NondeterministicState state in left.FinalStates.Concat(right.FinalStates)) - { - state.AddTransaction(EmptyChar.Empty, final); - } - - NondeterministicState start = new(); - start.AddTransaction(EmptyChar.Empty, left.Start); - start.AddTransaction(EmptyChar.Empty, right.Start); - - return new NondeterministicFiniteAutomation(start, [final]); - } -} - -public class ConcatenationExpression(RegularExpression first, RegularExpression second) : RegularExpression -{ - public RegularExpression First { get; } = first; - - public RegularExpression Second { get; } = second; - - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicFiniteAutomation first = First.Convert2Nfa(); - NondeterministicFiniteAutomation second = Second.Convert2Nfa(); - - foreach (NondeterministicState state in first.FinalStates) - { - state.AddTransaction(EmptyChar.Empty, second.Start); - } - - return new NondeterministicFiniteAutomation(first.Start, second.FinalStates); - } -} - -public class KleeneExpression(RegularExpression inner) : RegularExpression -{ - public RegularExpression Inner { get; } = inner; - - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicFiniteAutomation inner = Inner.Convert2Nfa(); - - NondeterministicState final = new(); - final.AddTransaction(EmptyChar.Empty, inner.Start); - - foreach (NondeterministicState state in inner.FinalStates) - { - state.AddTransaction(EmptyChar.Empty, final); - } - - return new NondeterministicFiniteAutomation(final, [final]); - } -} - -public class CharSetExpression : RegularExpression -{ - public char[] Set { get; } - - public CharSetExpression(Span set) - { - if (set.Length == 0) - { - throw new InvalidOperationException(); - } - - Set = set.ToArray(); - } - - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicState start = new(); - NondeterministicState final = new(); - - foreach (char c in Set) - { - start.AddTransaction(new EmptyChar(c), final); - } - - return new NondeterministicFiniteAutomation(start, [final]); - } -} - -public class StringExpression : RegularExpression -{ - public string Word { get; } - - public StringExpression(string word) - { - if (string.IsNullOrEmpty(word)) - { - throw new InvalidOperationException(); - } - - Word = word; - } - - public override NondeterministicFiniteAutomation Convert2Nfa() - { - NondeterministicState start = new(); - NondeterministicState final = Word.Aggregate(start, (state, c) => - { - NondeterministicState next = new(); - state.AddTransaction(new EmptyChar(c), next); - return next; - }); - - return new NondeterministicFiniteAutomation(start, [final]); - } -} diff --git a/CanonSharp.Common/Reader/SourceReader.cs b/CanonSharp.Common/Reader/SourceReader.cs deleted file mode 100644 index 0c2b818..0000000 --- a/CanonSharp.Common/Reader/SourceReader.cs +++ /dev/null @@ -1,78 +0,0 @@ -using CanonSharp.Common.Abstractions; - -namespace CanonSharp.Common.Reader; - -public class SourceReader : ISourceReader -{ - private readonly StreamReader _reader; - private char? _lookAhead; - - public SourceReader(string filename) - { - FileInfo source = new(filename); - - if (!source.Exists) - { - throw new InvalidOperationException(); - } - - _reader = new StreamReader(filename); - } - - public char Read() - { - if (_lookAhead.HasValue) - { - char result = _lookAhead.Value; - _lookAhead = null; - return result; - } - - if (!TryFetchChar(out char c)) - { - throw new InvalidOperationException(); - } - - return c; - } - - public bool TryPeek(out char c) - { - if (_lookAhead.HasValue) - { - c = _lookAhead.Value; - return true; - } - - if (!TryFetchChar(out c)) - { - return false; - } - - _lookAhead = c; - return true; - } - - private readonly char[] _buffer = new char[1024]; - private int _length; - private int _count; - - private bool TryFetchChar(out char c) - { - if (_length == _count) - { - _length = _reader.Read(_buffer); - _count = 0; - } - - if (_length == 0) - { - c = char.MinValue; - return false; - } - - c = _buffer[_count]; - _count += 1; - return true; - } -} diff --git a/CanonSharp.Common/Reader/StringReader.cs b/CanonSharp.Common/Reader/StringReader.cs deleted file mode 100644 index e7493ef..0000000 --- a/CanonSharp.Common/Reader/StringReader.cs +++ /dev/null @@ -1,33 +0,0 @@ -using CanonSharp.Common.Abstractions; - -namespace CanonSharp.Common.Reader; - -public class StringReader(string source) : ISourceReader -{ - private int _pos; - - public char Read() - { - if (_pos >= source.Length) - { - throw new InvalidOperationException(); - } - - char result = source[_pos]; - _pos += 1; - return result; - - } - - public bool TryPeek(out char c) - { - if (_pos < source.Length) - { - c = source[_pos]; - return true; - } - - c = char.MinValue; - return false; - } -} diff --git a/CanonSharp.Common/Scanner/LexicalScanner.cs b/CanonSharp.Common/Scanner/LexicalScanner.cs new file mode 100644 index 0000000..4ad4c1e --- /dev/null +++ b/CanonSharp.Common/Scanner/LexicalScanner.cs @@ -0,0 +1,150 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using static CanonSharp.Combinator.Text.TextParserBuilder; +using static CanonSharp.Combinator.ParserBuilder; + +namespace CanonSharp.Common.Scanner; + +public sealed class LexicalScanner +{ + private readonly Parser> _parser = PascalParser(); + + public IEnumerable Tokenize(TState state) + where TState : IReadState + { + return _parser.Parse(state).Value; + } + + public static Parser KeywordParser() + { + return from value in Choice(StringIgnoreCase("program"), + StringIgnoreCase("const"), + StringIgnoreCase("var"), + StringIgnoreCase("procedure"), + StringIgnoreCase("function"), + StringIgnoreCase("begin"), + StringIgnoreCase("end"), + StringIgnoreCase("array"), + StringIgnoreCase("of"), + StringIgnoreCase("if"), + StringIgnoreCase("then"), + StringIgnoreCase("else"), + StringIgnoreCase("for"), + StringIgnoreCase("to"), + StringIgnoreCase("do"), + StringIgnoreCase("integer"), + StringIgnoreCase("real"), + StringIgnoreCase("boolean"), + StringIgnoreCase("char"), + StringIgnoreCase("divide"), + StringIgnoreCase("not"), + StringIgnoreCase("mod"), + StringIgnoreCase("and"), + StringIgnoreCase("or"), + StringIgnoreCase("true"), + StringIgnoreCase("false"), + StringIgnoreCase("while")) + from _ in (AsciiLetter() | AsciiDigit() | Char('_')).LookAhead().Not() + select new LexicalToken(LexicalTokenType.Keyword, value); + } + + public static Parser DelimiterParser() + { + Parser semicolonParser = from token in Char(':') + from _ in Char('=').LookAhead().Not() + select new LexicalToken(LexicalTokenType.Delimiter, token.ToString()); + Parser periodParser = from token in Char('.') + from _ in Char('.').LookAhead().Not() + select new LexicalToken(LexicalTokenType.Delimiter, "."); + + Parser singleCharTokenParser = from token in Choice( + String(","), + String(";"), + String("("), + String(")"), + String("["), + String("]"), + String("..")) + select new LexicalToken(LexicalTokenType.Delimiter, token); + + return singleCharTokenParser | semicolonParser | periodParser; + } + + public static Parser OperatorParser() + { + Parser lessParser = from token in Char('<') + from _ in Char('=').LookAhead().Not() + select new LexicalToken(LexicalTokenType.Operator, "<"); + + Parser greaterParser = from token in Char('>') + from _ in Char('=').LookAhead().Not() + select new LexicalToken(LexicalTokenType.Operator, ">"); + + Parser otherParsers = from token in Choice( + String("="), + String("!="), + String("<="), + String(">="), + String("+"), + String("-"), + String("*"), + String("/"), + String(":=")) + select new LexicalToken(LexicalTokenType.Operator, token); + + return otherParsers | lessParser | greaterParser; + } + + public static Parser ConstIntegerParser() + { + return from nums in AsciiDigit().Many1() + from _ in Char('.').LookAhead().Not() + select new LexicalToken(LexicalTokenType.ConstInteger, new string(nums.ToArray())); + } + + public static Parser ConstFloatParser() + { + return from integer in AsciiDigit().Many1() + from _ in Char('.') + from fraction in AsciiDigit().Many1() + select new LexicalToken(LexicalTokenType.ConstFloat, + new string(integer.ToArray()) + '.' + new string(fraction.ToArray())); + } + + public static Parser IdentifierParser() + { + return from first in AsciiLetter() | Char('_') + from second in (AsciiLetter() | AsciiDigit() | Char('_')).Many() + select new LexicalToken(LexicalTokenType.Identifier, first + new string(second.ToArray())); + } + + public static Parser CommentParser() + { + return Any().Quote(Char('{'), Char('}')).Map(_ => Unit.Instance); + } + + public static Parser JunkParser() + { + return Space().Map(_ => Unit.Instance) | LineBreak().Map(_ => Unit.Instance) | CommentParser(); + } + + public static Parser CharParser() + { + return from str in Any().Quote(Char('\'')).Map(x => new string(x.ToArray())) + select str.Length <= 1 + ? new LexicalToken(LexicalTokenType.Character, str) + : new LexicalToken(LexicalTokenType.String, str); + } + + public static Parser> PascalParser() + { + return JunkParser().SkipTill(Choice(KeywordParser(), + DelimiterParser(), + OperatorParser(), + ConstIntegerParser(), + ConstFloatParser(), + CharParser(), + IdentifierParser())).Many(); + } +} diff --git a/CanonSharp.Common/Scanner/LexicalToken.cs b/CanonSharp.Common/Scanner/LexicalToken.cs new file mode 100644 index 0000000..1733ade --- /dev/null +++ b/CanonSharp.Common/Scanner/LexicalToken.cs @@ -0,0 +1,29 @@ +namespace CanonSharp.Common.Scanner; + +public enum LexicalTokenType +{ + Keyword, + ConstInteger, + ConstFloat, + Operator, + Delimiter, + Identifier, + Character, + String +} + +public sealed class LexicalToken(LexicalTokenType type, string literalValue) : IEquatable +{ + public LexicalTokenType TokenType { get; } = type; + + public string LiteralValue { get; } = literalValue; + + public bool Equals(LexicalToken? other) => + other is not null && TokenType == other.TokenType && LiteralValue == other.LiteralValue; + + public override bool Equals(object? obj) => obj is LexicalToken other && Equals(other); + + public override int GetHashCode() => TokenType.GetHashCode() ^ LiteralValue.GetHashCode(); + + public override string ToString() => $"<{TokenType}>'{LiteralValue}'"; +} diff --git a/CanonSharp.Common/Scanner/StringReadState.cs b/CanonSharp.Common/Scanner/StringReadState.cs new file mode 100644 index 0000000..15ce837 --- /dev/null +++ b/CanonSharp.Common/Scanner/StringReadState.cs @@ -0,0 +1,62 @@ +using CanonSharp.Combinator.Abstractions; + +namespace CanonSharp.Common.Scanner; + +/// +/// 字符串输入流状态 +/// +public sealed class StringReadState : IReadState +{ + private readonly string _source; + + private readonly int _index; + + public char Current => _source[_index]; + + public bool HasValue => _index < _source.Length; + + public StringReadState Next => new(_source, _index + 1); + + private StringReadState(string source, int index) + { + _source = source; + _index = index; + } + + public StringReadState(string source) : this(source, 0) + { + } + + public bool Equals(StringReadState? other) + { + if (other is null) + { + return false; + } + + return _source == other._source && _index == other._index; + } + + public override bool Equals(object? obj) => obj is StringReadState other && Equals(other); + + public override int GetHashCode() => _source.GetHashCode() ^ _index; + + public override string ToString() + { + return HasValue ? $"{ToReadableString(Current)}<0x{(int)Current:X2}>" : "End of string."; + } + + private static string ToReadableString(char token) + => token switch + { + '\0' => "\\0", + '\a' => "\\a", + '\b' => "\\b", + '\f' => "\\f", + '\n' => "\\n", + '\r' => "\\r", + '\t' => "\\t", + '\v' => "\\v", + _ => token.ToString(), + }; +} diff --git a/CanonSharp.Parser/CanonSharp.Parser.fsproj b/CanonSharp.Parser/CanonSharp.Parser.fsproj deleted file mode 100644 index b53f1b2..0000000 --- a/CanonSharp.Parser/CanonSharp.Parser.fsproj +++ /dev/null @@ -1,8 +0,0 @@ - - - - net8.0 - true - - - diff --git a/CanonSharp.Tests/CanonSharp.Tests.csproj b/CanonSharp.Tests/CanonSharp.Tests.csproj index b2e1a80..d5d6a25 100644 --- a/CanonSharp.Tests/CanonSharp.Tests.csproj +++ b/CanonSharp.Tests/CanonSharp.Tests.csproj @@ -24,7 +24,6 @@ - diff --git a/CanonSharp.Tests/CombinatorsTests/BasicParsersTests.cs b/CanonSharp.Tests/CombinatorsTests/BasicParsersTests.cs new file mode 100644 index 0000000..8abb6e7 --- /dev/null +++ b/CanonSharp.Tests/CombinatorsTests/BasicParsersTests.cs @@ -0,0 +1,84 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Tests.Utils; +using static CanonSharp.Combinator.ParserBuilder; + +namespace CanonSharp.Tests.CombinatorsTests; + +public class BasicParsersTests : ParserTestsBase +{ + [Fact] + public void AlternativeTest() + { + Parser parser = Token('a') | Token('b'); + + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateSuccessfulResult(parser, 'b', "bcd"); + ValidateFailedResult(parser, "cde"); + + parser = Token('a').Alternative(_ => Token('b')); + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateSuccessfulResult(parser, 'b', "bcd"); + ValidateFailedResult(parser, "cde"); + } + + [Fact] + public void BindTest() + { + Parser parser = Token('a').Bind(_ => Token('b')).Bind(_ => Token('c')); + ValidateSuccessfulResult(parser, 'c', "abc"); + ValidateFailedResult(parser, "acd"); + + ValidateFailedResult(parser, "ab"); + } + + [Fact] + public void MapTest() + { + Parser parser = Token('a').Map(c => $"{c}"); + ValidateSuccessfulResult(parser, "a", "abc"); + + parser = Token('a').Map("test"); + ValidateSuccessfulResult(parser, "test", "abc"); + } + + [Fact] + public void NextTest() + { + Parser parser = Token('a').Next(_ => Token('a'), _ => Token('b')); + ValidateSuccessfulResult(parser, 'a', "aaa"); + ValidateSuccessfulResult(parser, 'b', "bbb"); + + parser = Token('a').Next(_ => Token('a'), _ => 'b'); + ValidateSuccessfulResult(parser, 'b', "bbb"); + + parser = Token('a').Next(_ => Pure('1'), '2'); + ValidateSuccessfulResult(parser, '1', "aaa"); + ValidateSuccessfulResult(parser, '2', "bbb"); + } + + [Fact] + public void NextTest2() + { + Parser parser = Token('a').Next(_ => "123", _ => Pure("456")); + ValidateSuccessfulResult(parser, "123", "aaa"); + ValidateSuccessfulResult(parser, "456", "bbb"); + + parser = Token('a').Next(_ => "123", _ => "456"); + ValidateSuccessfulResult(parser, "123", "aaa"); + ValidateSuccessfulResult(parser, "456", "bbb"); + + parser = Token('a').Next(_ => "123", "456"); + ValidateSuccessfulResult(parser, "123", "aaa"); + ValidateSuccessfulResult(parser, "456", "bbb"); + } + + [Fact] + public void FixTest() + { + Parser parser = Fix(self => Token('a').Next(_ => self, Unit.Instance)) + .Bind(_ => Token('b')); + ValidateSuccessfulResult(parser, 'b', "aaaaab"); + } +} diff --git a/CanonSharp.Tests/CombinatorsTests/CombinatorParserTests.cs b/CanonSharp.Tests/CombinatorsTests/CombinatorParserTests.cs new file mode 100644 index 0000000..25c4eed --- /dev/null +++ b/CanonSharp.Tests/CombinatorsTests/CombinatorParserTests.cs @@ -0,0 +1,176 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Tests.Utils; +using static CanonSharp.Combinator.ParserBuilder; + +namespace CanonSharp.Tests.CombinatorsTests; + +public class CombinatorParserTests : ParserTestsBase +{ + [Fact] + public void ChoiceTest() + { + Parser parser = Choice(Token('a'), Token('b'), Token('c')); + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateSuccessfulResult(parser, 'b', "bcd"); + ValidateSuccessfulResult(parser, 'c', "cde"); + + parser = Choice([Token('a'), Token('b'), Token('c')]); + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateSuccessfulResult(parser, 'b', "bcd"); + ValidateSuccessfulResult(parser, 'c', "cde"); + } + + [Fact] + public void SequenceTest() + { + Parser> parser = Sequence(Token('a'), Token('b'), Token('c')); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "abc"); + + parser = Sequence([Token('a'), Token('b'), Token('c')]); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "abc"); + } + + [Fact] + public void LeftRightTest() + { + Parser parser = Token('a').Left(Token('b')); + ValidateSuccessfulResult(parser, 'a', "ab"); + + parser = Token('a').Right(Token('b')); + ValidateSuccessfulResult(parser, 'b', "ab"); + } + + [Fact] + public void ManyTest() + { + Parser> parser = Token('a').Many(); + ValidateSuccessfulResult(parser, [], "bbb"); + ValidateSuccessfulResult(parser, ['a', 'a', 'a'], "aaa"); + + parser = Token('a').Many1(); + ValidateSuccessfulResult(parser, ['a', 'a'], "aa"); + ValidateFailedResult(parser, "bbb"); + } + + [Fact] + public void SkipManyTest() + { + Parser parser = Token('a').SkipMany().Right(Token('b')); + ValidateSuccessfulResult(parser, 'b', "aaaab"); + ValidateSuccessfulResult(parser, 'b', "bbbb"); + + parser = Token('a').SkipMany1().Right(Token('b')); + ValidateSuccessfulResult(parser, 'b', "aaaaaab"); + ValidateFailedResult(parser, "bb"); + } + + [Fact] + public void ChainTest() + { + // 等效于Many1 + // 但是不返回中间结果 + Parser parser = Token('a').Chain(Token); + ValidateSuccessfulResult(parser, 'a', "aa"); + ValidateFailedResult(parser, "bb"); + + parser = Token('_').Chain(x => x == '_' ? Satisfy(char.IsLetter) : Satisfy(char.IsDigit)); + ValidateSuccessfulResult(parser, '1', "_a1"); + ValidateSuccessfulResult(parser, '_', "_123"); + } + + [Fact] + public void ManyTillTest() + { + Parser> parser = Token('a').ManyTill(Token('b').LookAhead()); + ValidateSuccessfulResult(parser, ['a', 'a', 'a'], "aaab"); + ValidateSuccessfulResult(parser, [], "b"); + + parser = Token('a').Many1Till(Token('b').LookAhead()); + ValidateSuccessfulResult(parser, ['a', 'a'], "aab"); + ValidateFailedResult(parser, "bb"); + } + + [Fact] + public void SkipTillTest() + { + Parser parser = Token('a').SkipTill(Token('b')); + ValidateSuccessfulResult(parser, 'b', "aaab"); + ValidateSuccessfulResult(parser, 'b', "b"); + + parser = Token('a').Skip1Till(Token('b')); + ValidateSuccessfulResult(parser, 'b', "aaab"); + ValidateFailedResult(parser, "b"); + } + + [Fact] + public void TakeTillTest() + { + Parser> parser = TakeTill(Token('b').LookAhead()); + ValidateSuccessfulResult(parser, ['a', 'a'], "aab"); + ValidateSuccessfulResult(parser, [], "b"); + + parser = Take1Till(Token('b').LookAhead()); + ValidateSuccessfulResult(parser, ['a', 'a'], "aab"); + ValidateFailedResult(parser, "b"); + } + + [Fact] + public void MatchTest() + { + Parser parser = Token('b').Match(); + ValidateSuccessfulResult(parser, 'b', "asdfasdfasdfasdfb"); + ValidateSuccessfulResult(parser, 'b', "b"); + } + + [Fact] + public void QuoteTest() + { + Parser> parser = Any().Quote(Token('['), Token(']')); + ValidateSuccessfulResult(parser, ['1', '2', '3'], "[123]"); + + parser = Any().Quote(Token('\'')); + ValidateSuccessfulResult(parser, ['1', '2', '3'], "'123'"); + } + + [Fact] + public void SeparatedByTest() + { + Parser> parser = Token('a').SeparatedBy(Token(',')); + ValidateSuccessfulResult(parser, ['a', 'a', 'a'], "a,a,a"); + ValidateSuccessfulResult(parser, ['a'], "a"); + ValidateSuccessfulResult(parser, [], ""); + + parser = Token('a').SeparatedBy1(Token(',')); + ValidateSuccessfulResult(parser, ['a', 'a', 'a'], "a,a,a"); + ValidateSuccessfulResult(parser, ['a'], "a"); + ValidateFailedResult(parser, ""); + } + + [Fact] + public void EndByTest() + { + Parser> parser = Satisfy(char.IsLetter).EndBy(Token('.')); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "abc."); + ValidateSuccessfulResult(parser, [], "."); + + parser = Satisfy(char.IsLetter).EndBy1(Token('.')); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "abc."); + ValidateFailedResult(parser, "."); + } + + [Fact] + public void SeparatedOrEndByTest() + { + Parser> parser = Satisfy(char.IsLetter).SeparatedOrEndBy1(Token(',')); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "a,b,c,"); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "a,b,c"); + ValidateFailedResult(parser, ""); + + parser = Satisfy(char.IsLetter).SeparatedOrEndBy(Token(',')); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "a,b,c,"); + ValidateSuccessfulResult(parser, ['a', 'b', 'c'], "a,b,c"); + ValidateSuccessfulResult(parser, [], ""); + } +} diff --git a/CanonSharp.Tests/CombinatorsTests/LinqTests.cs b/CanonSharp.Tests/CombinatorsTests/LinqTests.cs new file mode 100644 index 0000000..8aaffa0 --- /dev/null +++ b/CanonSharp.Tests/CombinatorsTests/LinqTests.cs @@ -0,0 +1,29 @@ +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using static CanonSharp.Combinator.Text.TextParserBuilder; +using CanonSharp.Tests.Utils; + +namespace CanonSharp.Tests.CombinatorsTests; + +public class LinqTests : ParserTestsBase +{ + [Fact] + public void SelectTest1() + { + Parser parser = from token in Char('a') + select token.ToString(); + ValidateSuccessfulResult(parser, "a", "a"); + ValidateFailedResult(parser, "b"); + } + + [Fact] + public void SelectManyTest1() + { + Parser parser = from _1 in Char('a') + from _2 in Char('b') + from _3 in Char('c') + select 123; + ValidateSuccessfulResult(parser, 123, "abc"); + ValidateFailedResult(parser, "asd"); + } +} diff --git a/CanonSharp.Tests/CombinatorsTests/ModifierParserTests.cs b/CanonSharp.Tests/CombinatorsTests/ModifierParserTests.cs new file mode 100644 index 0000000..498fcda --- /dev/null +++ b/CanonSharp.Tests/CombinatorsTests/ModifierParserTests.cs @@ -0,0 +1,51 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Tests.Utils; +using static CanonSharp.Combinator.ParserBuilder; +using static CanonSharp.Combinator.Text.TextParserBuilder; + +namespace CanonSharp.Tests.CombinatorsTests; + +public class ModifierParserTests : ParserTestsBase +{ + [Fact] + public void DoTest() + { + Parser parser = Token('a').Do(x => Assert.Equal('a', x)).Do(x => Assert.Equal('a', x), + failedResult => Assert.ThrowsAny(() => failedResult.Value)); + + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateFailedResult(parser, "bcd"); + } + + [Fact] + public void LookAheadTest() + { + Parser parser = Token('a').LookAhead().Next(_ => Token('a'), _ => Token('b')); + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateSuccessfulResult(parser, 'b', "bcd"); + } + + [Fact] + public void NotTest() + { + Parser parser = Token('a').Not('b'); + ValidateSuccessfulResult(parser, 'b', "bcd"); + + parser = Token('a').Not().Bind(_ => Token('b')); + ValidateSuccessfulResult(parser, 'b', "bcd"); + } + + [Fact] + public void TryTest() + { + Parser parser = String("abc").Try("cde"); + ValidateSuccessfulResult(parser, "abc", "abc"); + ValidateSuccessfulResult(parser, "cde", "cde"); + + parser = String("abc").Try(_ => "cde"); + ValidateSuccessfulResult(parser, "abc", "abc"); + ValidateSuccessfulResult(parser, "cde", "cde"); + } +} diff --git a/CanonSharp.Tests/CombinatorsTests/PrimitiveParserTests.cs b/CanonSharp.Tests/CombinatorsTests/PrimitiveParserTests.cs new file mode 100644 index 0000000..504af07 --- /dev/null +++ b/CanonSharp.Tests/CombinatorsTests/PrimitiveParserTests.cs @@ -0,0 +1,84 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Tests.Utils; +using static CanonSharp.Combinator.ParserBuilder; + +namespace CanonSharp.Tests.CombinatorsTests; + +public class PrimitiveParserTests : ParserTestsBase +{ + [Fact] + public void PureTest() + { + Parser parser = Pure('a'); + ValidateSuccessfulResult(parser, 'a', "abc"); + + parser = Pure(_ => 'a'); + ValidateSuccessfulResult(parser, 'a', "abc"); + } + + [Fact] + public void NullTest() + { + Parser parser = Null(); + ValidateSuccessfulResult(parser, Unit.Instance, "abc"); + } + + [Fact] + public void FailTest() + { + Parser parser = Fail(); + ValidateFailedResult(parser, "abc"); + + parser = Fail("Failed message"); + FailedResult result = ValidateFailedResult(parser, "abc"); + Assert.Equal("Failed message", result.Message); + + parser = Fail(x => $"{x}"); + result = ValidateFailedResult(parser, "abc"); + Assert.Equal("a<0x61>", result.Message); + + parser = Fail(new InvalidOperationException()); + result = ValidateFailedResult(parser, "abc"); + Assert.IsType(result.Exception.InnerException); + } + + [Fact] + public void SatisfyTest() + { + Parser parser = Satisfy(char.IsLetter); + ValidateSuccessfulResult(parser, 'a', "abc"); + ValidateFailedResult(parser, "123"); + } + + [Fact] + public void AnyTest() + { + Parser parser = Any(); + ValidateSuccessfulResult(parser, '1', "123"); + } + + [Fact] + public void TokenTest() + { + Parser parser = Token('a'); + ValidateSuccessfulResult(parser, 'a', "abc"); + } + + [Fact] + public void TakeTest() + { + Parser> parser = Take(5); + ValidateSuccessfulResult(parser, ['h', 'e', 'l', 'l', 'o'], "hello"); + ValidateFailedResult(parser, "abc"); + } + + [Fact] + public void SkipTest() + { + Parser parser = Skip(5).Bind(_ => Token(',')); + ValidateSuccessfulResult(parser, ',', "hello,world."); + ValidateFailedResult(parser, "abc"); + } +} diff --git a/CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs b/CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs deleted file mode 100644 index 01b6df7..0000000 --- a/CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs +++ /dev/null @@ -1,25 +0,0 @@ -using CanonSharp.Common.Abstractions; -using StringReader = CanonSharp.Common.Reader.StringReader; - -namespace CanonSharp.Tests.LexicalAnalyzerTests; - -public class ReaderTests -{ - [Fact] - public void StringReaderTest() - { - StringReader reader = new("ab"); - - Assert.True(reader.TryPeek(out char c)); - Assert.Equal('a', c); - Assert.True(reader.TryPeek(out c)); - Assert.Equal('a', c); - Assert.Equal('a', reader.Read()); - Assert.True(reader.TryPeek(out c)); - Assert.Equal('b', c); - Assert.True(reader.TryPeek(out c)); - Assert.Equal('b', c); - Assert.Equal('b', reader.Read()); - Assert.False(reader.TryPeek(out c)); - } -} diff --git a/CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs b/CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs deleted file mode 100644 index da1b49e..0000000 --- a/CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs +++ /dev/null @@ -1,104 +0,0 @@ -using CanonSharp.Common.LexicalAnalyzer; - -namespace CanonSharp.Tests.LexicalAnalyzerTests; - -public class RegularExpressionTests -{ - [Fact] - public void KleeneTest() - { - RegularExpression expression = RegularExpression.Concatenate( - RegularExpression.Kleene(RegularExpression.Single('a')), - RegularExpression.Single('b')); - - NondeterministicFiniteAutomation automation = expression.Convert2Nfa(); - - automation.Start.Transactions.TryGetValue(EmptyChar.Empty, out HashSet? next); - Assert.NotNull(next); - - Assert.Contains(next, s => s.Transactions.ContainsKey(new EmptyChar('a'))); - Assert.Contains(next, s => s.Transactions.ContainsKey(new EmptyChar('b'))); - } - - [Fact] - public void AlternateTest() - { - RegularExpression expression = RegularExpression.Alternate( - RegularExpression.Kleene(RegularExpression.Single('a')), - RegularExpression.Single('b')); - - NondeterministicFiniteAutomation automation = expression.Convert2Nfa(); - - automation.Start.Transactions.TryGetValue(EmptyChar.Empty, out HashSet? next); - Assert.NotNull(next); - - Assert.Contains(next, s => s.Transactions.ContainsKey(new EmptyChar('b'))); - - NondeterministicState? state = (from item in next - where item.Transactions[EmptyChar.Empty].Count == 2 - select item).FirstOrDefault(); - Assert.NotNull(state); - - Assert.Contains(state.Transactions[EmptyChar.Empty], - s => s.Transactions.ContainsKey(new EmptyChar('a'))); - } - - [Fact] - public void RangeTest() - { - RegularExpression expression = RegularExpression.Range('a', 'z'); - NondeterministicFiniteAutomation nfa = expression.Convert2Nfa(); - - Assert.Equal(26, nfa.Start.Transactions.Count); - } - - [Fact] - public void ConvertTest() - { - RegularExpression expression = RegularExpression.Alternate( - RegularExpression.Kleene(RegularExpression.Single('a')), - RegularExpression.Single('b')); - - NondeterministicFiniteAutomation automation = expression.Convert2Nfa(); - DeterministicFiniteAutomation dfa = DeterministicFiniteAutomation.Create(automation); - - DeterministicState state2 = dfa.Start.Transaction['a']; - Assert.Equal(state2, state2.Transaction['a']); - - DeterministicState state3 = dfa.Start.Transaction['b']; - Assert.Empty(state3.Transaction); - - Assert.Equal(3, dfa.FinalStates.Count); - } - - [Fact] - public void NondeterministicStateSetTest() - { - Dictionary map = []; - - NondeterministicState key1 = new(); - NondeterministicState key2 = new(); - - map.Add(new NondeterministicStateSet([key1, key2]), 'a'); - - Assert.Equal('a', map[new NondeterministicStateSet([key2, key1])]); - } - - [Fact] - public void PrefixConvertTest() - { - RegularExpression expression = RegularExpression.Alternate( - RegularExpression.String("string"), - RegularExpression.String("string1")); - - NondeterministicFiniteAutomation nfa = expression.Convert2Nfa(); - DeterministicFiniteAutomation.Create(nfa); - } - - [Fact] - public void WhiteSpaceConvertTest() - { - NondeterministicFiniteAutomation nfa = LexicalToken.WhiteSpace.Expression.Convert2Nfa(); - DeterministicFiniteAutomation.Create(nfa); - } -} diff --git a/CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs b/CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs deleted file mode 100644 index a929b90..0000000 --- a/CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs +++ /dev/null @@ -1,71 +0,0 @@ -using CanonSharp.Common.LexicalAnalyzer; -using StringReader = CanonSharp.Common.Reader.StringReader; - -namespace CanonSharp.Tests.LexicalAnalyzerTests; - -public class ScanTests -{ - [Fact] - public void ScanTest1() - { - LexicalScannerBuilder builder = LexicalScanner.CreateEmptyBuilder(); - - LexicalToken token1 = new(RegularExpression.String("ab"), 1); - builder.DefineToken(token1); - - StringReader reader = new("ab"); - LexicalScanner scanner = builder.Build(reader); - - Assert.True(scanner.TryRead(out LexicalToken? result)); - Assert.Equal(token1, result); - Assert.Equal("ab", result.LiteralValue); - } - - [Fact] - public void ScanTest2() - { - LexicalScannerBuilder builder = LexicalScanner.CreateDefaultBuilder(); - - LexicalToken stringKeyword = new(RegularExpression.String("string"), 100); - LexicalToken periodDelimiter = new(RegularExpression.Single('.'), 100); - LexicalToken semiColonDelimiter = new(RegularExpression.Single(';'), 100); - LexicalToken identifier = new(RegularExpression.Concatenate(RegularExpression.Range('a', 'z'), - RegularExpression.Kleene(RegularExpression.Range('a', 'z'))), 0); - LexicalToken assigner = new(RegularExpression.String(":="), 100); - builder.DefineToken(stringKeyword); - builder.DefineToken(periodDelimiter); - builder.DefineToken(semiColonDelimiter); - builder.DefineToken(identifier); - builder.DefineToken(assigner); - - StringReader reader = new(""" - string value := origin; - string abc := value. - """); - LexicalScanner scanner = builder.Build(reader); - Validate(scanner, [ - stringKeyword, - identifier, - assigner, - identifier, - semiColonDelimiter, - stringKeyword, - identifier, - assigner, - identifier, - periodDelimiter - ]); - - Assert.False(scanner.TryRead(out _)); - } - - private static void Validate(LexicalScanner scanner, IEnumerable expectedTokens) - { - foreach (LexicalToken token in expectedTokens) - { - Assert.True(scanner.TryRead(out LexicalToken? outToken)); - Assert.NotNull(outToken); - Assert.Equal(token, outToken); - } - } -} diff --git a/CanonSharp.Tests/ReaderTests/StringReadStateTests.cs b/CanonSharp.Tests/ReaderTests/StringReadStateTests.cs new file mode 100644 index 0000000..4aeeb8d --- /dev/null +++ b/CanonSharp.Tests/ReaderTests/StringReadStateTests.cs @@ -0,0 +1,21 @@ +using CanonSharp.Combinator.Extensions; +using CanonSharp.Common.Scanner; + +namespace CanonSharp.Tests.LexicalAnalyzerTests; + +public class StringReadStateTests +{ + [Fact] + public void AsEnumerableTest() + { + StringReadState state = new("abc"); + + IEnumerable states = state.AsEnumerable(); + + foreach ((char c, StringReadState s) in "abc".Zip(states)) + { + Assert.True(s.HasValue); + Assert.Equal(c, s.Current); + } + } +} diff --git a/CanonSharp.Tests/ScannerTest/LexicalParserTests.cs b/CanonSharp.Tests/ScannerTest/LexicalParserTests.cs new file mode 100644 index 0000000..4bd0d84 --- /dev/null +++ b/CanonSharp.Tests/ScannerTest/LexicalParserTests.cs @@ -0,0 +1,167 @@ +using CanonSharp.Common.Scanner; +using CanonSharp.Tests.Utils; + +namespace CanonSharp.Tests.ScannerTest; + +public class LexicalParserTests : LexicalTestBase +{ + [Fact] + public void LexicalParserTest1() + { + const string pascalProgram = """ + program HelloWorld; + var + message: char; + begin + message := 'h'; + writeln(message); + end. + """; + + ValidateLexicalTokens(LexicalScanner.PascalParser(), pascalProgram, [ + (LexicalTokenType.Keyword, "program"), + (LexicalTokenType.Identifier, "HelloWorld"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "var"), + (LexicalTokenType.Identifier, "message"), + (LexicalTokenType.Delimiter, ":"), + (LexicalTokenType.Keyword, "char"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "begin"), + (LexicalTokenType.Identifier, "message"), + (LexicalTokenType.Operator, ":="), + (LexicalTokenType.Character, "h"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Identifier, "writeln"), + (LexicalTokenType.Delimiter, "("), + (LexicalTokenType.Identifier, "message"), + (LexicalTokenType.Delimiter, ")"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "end"), + (LexicalTokenType.Delimiter, ".") + ]); + } + + [Fact] + public void LexicalParserTest2() + { + const string program = """ + program main; + var + ab: integer; + begin + ab := 3; + write(ab); + end. + """; + + ValidateLexicalTokens(LexicalScanner.PascalParser(), program, [ + (LexicalTokenType.Keyword, "program"), + (LexicalTokenType.Identifier, "main"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "var"), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Delimiter, ":"), + (LexicalTokenType.Keyword, "integer"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "begin"), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Operator, ":="), + (LexicalTokenType.ConstInteger, "3"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Identifier, "write"), + (LexicalTokenType.Delimiter, "("), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Delimiter, ")"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "end"), + (LexicalTokenType.Delimiter, ".") + ]); + } + + [Fact] + public void LexicalParserTest3() + { + const string pascalProgram = """ + {test} + program main; + var + ab, ba: integer; + begin + ab := 3; + ba := 5; + ab := 5; + write(ab + ba); + end. + """; + + ValidateLexicalTokens(LexicalScanner.PascalParser(), pascalProgram, [ + (LexicalTokenType.Keyword, "program"), + (LexicalTokenType.Identifier, "main"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "var"), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Delimiter, ","), + (LexicalTokenType.Identifier, "ba"), + (LexicalTokenType.Delimiter, ":"), + (LexicalTokenType.Keyword, "integer"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "begin"), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Operator, ":="), + (LexicalTokenType.ConstInteger, "3"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Identifier, "ba"), + (LexicalTokenType.Operator, ":="), + (LexicalTokenType.ConstInteger, "5"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Operator, ":="), + (LexicalTokenType.ConstInteger, "5"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Identifier, "write"), + (LexicalTokenType.Delimiter, "("), + (LexicalTokenType.Identifier, "ab"), + (LexicalTokenType.Operator, "+"), + (LexicalTokenType.Identifier, "ba"), + (LexicalTokenType.Delimiter, ")"), + (LexicalTokenType.Delimiter, ";"), + (LexicalTokenType.Keyword, "end"), + (LexicalTokenType.Delimiter, ".") + ]); + } + + [Theory] + [InlineData(""" + program exFunction; + var + a, b, ret : integer; + + begin + a := 100; + b := 200; + { calling a function to get max valued } + ret := a - b; + + + + end. + """, 29)] + [InlineData(""" + { + This is a block comment that does closed. + } + program CommentClosed; + """, 3)] + [InlineData(""" + {} + program CommentClosed; + """, 3)] + public void LexicalParserTest(string input, int count) + { + LexicalScanner scanner = new(); + + List tokens = scanner.Tokenize(new StringReadState(input)).ToList(); + Assert.Equal(count, tokens.Count); + } +} diff --git a/CanonSharp.Tests/ScannerTest/LexicalTokenParserTest.cs b/CanonSharp.Tests/ScannerTest/LexicalTokenParserTest.cs new file mode 100644 index 0000000..9f939d2 --- /dev/null +++ b/CanonSharp.Tests/ScannerTest/LexicalTokenParserTest.cs @@ -0,0 +1,177 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Common.Scanner; +using CanonSharp.Tests.Utils; + +namespace CanonSharp.Tests.ScannerTest; + +public class LexicalTokenParserTest : LexicalTestBase +{ + [Theory] + [InlineData("program")] + [InlineData("const")] + [InlineData("var")] + [InlineData("procedure")] + [InlineData("function")] + [InlineData("begin")] + [InlineData("end")] + [InlineData("array")] + [InlineData("of")] + [InlineData("if")] + [InlineData("then")] + [InlineData("else")] + [InlineData("for")] + [InlineData("to")] + [InlineData("do")] + [InlineData("true")] + [InlineData("false")] + [InlineData("while")] + public void KeywordParserTest(string literalValue) + { + Parser keyword = LexicalScanner.KeywordParser(); + ValidateSuccessfulParser(keyword, LexicalTokenType.Keyword, literalValue, literalValue); + } + + [Theory] + [InlineData("andOne")] + [InlineData("program1")] + [InlineData("todo")] + public void FailedKeywordParserTest(string input) + { + Parser keyword = LexicalScanner.KeywordParser(); + ValidateFailedParser(keyword, input); + } + + [Theory] + [InlineData(",")] + [InlineData(".")] + [InlineData(";")] + [InlineData(":")] + [InlineData("(")] + [InlineData(")")] + [InlineData("[")] + [InlineData("]")] + [InlineData("..")] + public void DelimiterParserTest(string literalValue) + { + Parser delimiter = LexicalScanner.DelimiterParser(); + ValidateSuccessfulParser(delimiter, LexicalTokenType.Delimiter, literalValue, literalValue); + } + + [Theory] + [InlineData(":=")] + public void FailedDelimiterParserTest(string input) + { + Parser delimiter = LexicalScanner.DelimiterParser(); + ValidateFailedParser(delimiter, input); + } + + [Theory] + [InlineData("=")] + [InlineData("!=")] + [InlineData(">")] + [InlineData(">=")] + [InlineData("<")] + [InlineData("<=")] + [InlineData("+")] + [InlineData("-")] + [InlineData("*")] + [InlineData("/")] + [InlineData(":=")] + public void OperatorParserTest(string literalValue) + { + Parser operatorParser = LexicalScanner.OperatorParser(); + ValidateSuccessfulParser(operatorParser, LexicalTokenType.Operator, literalValue, literalValue); + } + + [Theory] + [InlineData("identifier")] + [InlineData("_identifier")] + [InlineData("identifier123")] + [InlineData("identifier_with_underscore")] + [InlineData("CamelCase")] + [InlineData("andand")] + public void IdentifierParserTest(string literalValue) + { + Parser identifier = LexicalScanner.IdentifierParser(); + + ValidateSuccessfulParser(identifier, LexicalTokenType.Identifier, literalValue, literalValue); + } + + [Theory] + [InlineData(123, "123")] + [InlineData(0, "0")] + public void ConstIntegerTest(int value, string input) + { + StringReadState state = new(input); + ParseResult result = LexicalScanner.ConstIntegerParser().Parse(state); + + Assert.Equal(LexicalTokenType.ConstInteger, result.Value.TokenType); + Assert.Equal(value, int.Parse(result.Value.LiteralValue)); + } + + [Theory] + [InlineData(123.456, "123.456")] + [InlineData(0, "0.0")] + public void ConstFloatTest(double value, string input) + { + StringReadState state = new(input); + ParseResult result = LexicalScanner.ConstFloatParser().Parse(state); + + Assert.Equal(LexicalTokenType.ConstFloat, result.Value.TokenType); + Assert.Equal(value, double.Parse(result.Value.LiteralValue)); + } + + [Theory] + [InlineData('a', "'a'")] + [InlineData('Z', "'Z'")] + public void CharTest(char value, string input) + { + StringReadState state = new(input); + ParseResult result = LexicalScanner.CharParser().Parse(state); + + Assert.Equal(LexicalTokenType.Character, result.Value.TokenType); + Assert.Equal(value, char.Parse(result.Value.LiteralValue)); + } + + [Theory] + [InlineData("hello, world!", "'hello, world!'")] + public void StringTest(string value, string input) + { + StringReadState state = new(input); + ParseResult result = LexicalScanner.CharParser().Parse(state); + + Assert.Equal(LexicalTokenType.String, result.Value.TokenType); + Assert.Equal(value, result.Value.LiteralValue); + } + + [Theory] + [InlineData("{comment}")] + [InlineData("{}")] + public void CommentTest(string input) + { + StringReadState state = new(input); + ParseResult result = LexicalScanner.CommentParser().Parse(state); + + Assert.Equal(Unit.Instance, result.Value); + } + + [Theory] + [InlineData(" {comment} program")] + [InlineData(""" + {comment} + {comment} + {}{}{}{} + program + """)] + public void JunkTest(string input) + { + StringReadState state = new(input); + Parser parser = LexicalScanner.JunkParser().SkipTill(LexicalScanner.KeywordParser()); + ParseResult result = parser.Parse(state); + + Assert.Equal(LexicalTokenType.Keyword, result.Value.TokenType); + Assert.Equal("program", result.Value.LiteralValue); + } +} diff --git a/CanonSharp.Tests/TextTests/TextParserTests.cs b/CanonSharp.Tests/TextTests/TextParserTests.cs new file mode 100644 index 0000000..d39f5d7 --- /dev/null +++ b/CanonSharp.Tests/TextTests/TextParserTests.cs @@ -0,0 +1,169 @@ +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Combinator.Extensions; +using CanonSharp.Combinator.Text; +using CanonSharp.Common.Scanner; +using CanonSharp.Tests.Utils; +using static CanonSharp.Combinator.Text.TextParserBuilder; + +namespace CanonSharp.Tests.TextTests; + +public class TextParserTests : ParserTestsBase +{ + [Fact] + public void CharTest() + { + ValidateSuccessfulResult(Char('a'), 'a', "abc"); + ValidateSuccessfulResult(CharIgnoreCase('a'), 'a', "abc"); + ValidateSuccessfulResult(CharIgnoreCase('a'), 'A', "ABC"); + } + + [Theory] + [InlineData('a', "a")] + [InlineData('b', "b")] + [InlineData('c', "c")] + [InlineData('d', "d")] + public void OneOfTest(char except, string input) + { + Parser parser = OneOf("abcd"); + ValidateSuccessfulResult(parser, except, input); + } + + [Theory] + [InlineData('a', "a")] + [InlineData('b', "b")] + [InlineData('c', "c")] + [InlineData('d', "d")] + [InlineData('A', "A")] + [InlineData('B', "B")] + [InlineData('C', "C")] + [InlineData('D', "D")] + public void OneOfIgnoreCaseTest(char except, string input) + { + Parser parser = OneOfIgnoreCase("abcd"); + ValidateSuccessfulResult(parser, except, input); + } + + [Theory] + [InlineData("hello,world.")] + [InlineData("HELLO,WORLD.")] + [InlineData("Hello,world.")] + [InlineData("Hello,World.")] + public void StringIgnoreCaseTest(string literalValue) + { + Parser parser = StringIgnoreCase("hello,world."); + ValidateSuccessfulResult(parser, literalValue, literalValue); + } + + [Theory] + [InlineData('0')] + [InlineData('5')] + [InlineData('9')] + public void RangeTest(char except) + { + Parser parser = Range('0', '9'); + ValidateSuccessfulResult(parser, except, except.ToString()); + + ValidateFailedResult(parser, "abc"); + } + + [Theory] + [InlineData('a')] + [InlineData('A')] + [InlineData('z')] + [InlineData('测')] + public void LetterTest(char except) + { + ValidateSuccessfulResult(Letter(), except, except.ToString()); + } + + [Theory] + [InlineData('0')] + [InlineData(',')] + [InlineData('%')] + public void FailedLetterTest(char except) + { + ValidateFailedResult(Letter(), except.ToString()); + } + + [Theory] + [InlineData('0')] + public void DigitTest(char except) + { + ValidateSuccessfulResult(Digit(), except, except.ToString()); + } + + [Theory] + [InlineData('a')] + [InlineData('A')] + [InlineData('z')] + [InlineData('测')] + [InlineData(',')] + [InlineData('%')] + public void FailedDigitTest(char except) + { + ValidateFailedResult(Digit(), except.ToString()); + } + + [Theory] + [InlineData('a')] + [InlineData('z')] + [InlineData('A')] + [InlineData('Z')] + public void AsciiLetterTest(char except) + { + ValidateSuccessfulResult(AsciiLetter(), except, except.ToString()); + } + + [Theory] + [InlineData('0')] + [InlineData(',')] + [InlineData('%')] + public void FailedAsciiLetterTest(char except) + { + ValidateFailedResult(AsciiLetter(), except.ToString()); + } + + [Theory] + [InlineData('0')] + public void AsciiDigitTest(char except) + { + ValidateSuccessfulResult(AsciiDigit(), except, except.ToString()); + } + + [Theory] + [InlineData('a')] + [InlineData('A')] + [InlineData('z')] + [InlineData('测')] + [InlineData(',')] + [InlineData('%')] + public void FailedAsciiDigitTest(char except) + { + ValidateFailedResult(AsciiDigit(), except.ToString()); + } + + [Fact] + public void SkipSpacesTest() + { + ValidateSuccessfulResult(String("test").SkipSpaces(), "test", " test"); + ValidateSuccessfulResult(String("test").SkipSpaces(), "test", "\t test"); + + ValidateSuccessfulResult(String("test").SkipSpaces().Many(), ["test", "test", "test"], + "test test test test"); + } + + [Fact] + public void SkipSpaceAndLineBreakTest() + { + StringReadState state = new(""" + test test test + test test + test + """); + + Parser> parser = StringIgnoreCase("test").SkipSpaceAndLineBreak().Many(); + ParseResult> result = parser.Parse(state); + + Assert.All(result.Value, x => Assert.Equal("test", x.ToLower())); + } +} diff --git a/CanonSharp.Tests/Utils/LexicalTestBase.cs b/CanonSharp.Tests/Utils/LexicalTestBase.cs new file mode 100644 index 0000000..fdb9657 --- /dev/null +++ b/CanonSharp.Tests/Utils/LexicalTestBase.cs @@ -0,0 +1,39 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Common.Scanner; + +namespace CanonSharp.Tests.Utils; + +public abstract class LexicalTestBase +{ + protected static void ValidateSuccessfulParser(Parser parser, LexicalTokenType exceptedType, + string literalValue, string input) + { + StringReadState state = new(input); + ParseResult result = parser.Parse(state); + + Assert.Equal(exceptedType, result.Value.TokenType); + Assert.Equal(literalValue, result.Value.LiteralValue); + } + + protected static void ValidateFailedParser(Parser parser, string input) + { + StringReadState state = new(input); + ParseResult result = parser.Parse(state); + Assert.ThrowsAny(() => result.Value); + } + + protected static void ValidateLexicalTokens(Parser> parser, string input, + IEnumerable<(LexicalTokenType, string)> exceptedResult) + { + StringReadState state = new(input); + ParseResult> result = parser.Parse(state); + + foreach (((LexicalTokenType exceptedType, string exceptedValue), LexicalToken token) in exceptedResult.Zip( + result.Value)) + { + Assert.Equal(exceptedType, token.TokenType); + Assert.Equal(exceptedValue, token.LiteralValue); + } + } +} diff --git a/CanonSharp.Tests/Utils/ParserTestsBase.cs b/CanonSharp.Tests/Utils/ParserTestsBase.cs new file mode 100644 index 0000000..49949ff --- /dev/null +++ b/CanonSharp.Tests/Utils/ParserTestsBase.cs @@ -0,0 +1,43 @@ +using CanonSharp.Combinator; +using CanonSharp.Combinator.Abstractions; +using CanonSharp.Common.Scanner; + +namespace CanonSharp.Tests.Utils; + +public abstract class ParserTestsBase +{ + protected static void ValidateSuccessfulResult(Parser parser, T value, string source) + { + StringReadState state = new(source); + + ParseResult result = parser.Parse(state); + Assert.Equal(value, result.Value); + } + + protected static void ValidateSuccessfulResult( + Parser> parser, + IEnumerable values, string source) + { + StringReadState state = new(source); + + ParseResult> result = parser.Parse(state); + + foreach ((T actual, T except) in result.Value.Zip(values)) + { + Assert.Equal(except, actual); + } + } + + protected static FailedResult ValidateFailedResult(Parser parser, string source) + { + StringReadState state = new(source); + + ParseResult result = parser.Parse(state); + Assert.ThrowsAny(() => + { + _ = result.Value; + }); + + return (FailedResult)result; + } +} diff --git a/CanonSharp.sln b/CanonSharp.sln index f2910f5..fea8a09 100644 --- a/CanonSharp.sln +++ b/CanonSharp.sln @@ -3,8 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.0.31903.59 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "CanonSharp.Parser", "CanonSharp.Parser\CanonSharp.Parser.fsproj", "{A985BD0A-4AEF-44D6-BD36-5F8035A25ED7}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CanonSharp.Tests", "CanonSharp.Tests\CanonSharp.Tests.csproj", "{5A28EF92-805F-44E9-8E13-EA9A5BB623BB}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CanonSharp.Common", "CanonSharp.Common\CanonSharp.Common.csproj", "{288943FE-E3E6-49E2-8C6F-9B5B9242266C}" @@ -16,6 +14,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{ .gitea\workflows\unit-test.yaml = .gitea\workflows\unit-test.yaml EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CanonSharp.Combinator", "CanonSharp.Combinator\CanonSharp.Combinator.csproj", "{715CAABD-41C8-4CF4-95FC-204705D2B3E6}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -25,10 +25,6 @@ Global HideSolutionNode = FALSE EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {A985BD0A-4AEF-44D6-BD36-5F8035A25ED7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {A985BD0A-4AEF-44D6-BD36-5F8035A25ED7}.Debug|Any CPU.Build.0 = Debug|Any CPU - {A985BD0A-4AEF-44D6-BD36-5F8035A25ED7}.Release|Any CPU.ActiveCfg = Release|Any CPU - {A985BD0A-4AEF-44D6-BD36-5F8035A25ED7}.Release|Any CPU.Build.0 = Release|Any CPU {5A28EF92-805F-44E9-8E13-EA9A5BB623BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {5A28EF92-805F-44E9-8E13-EA9A5BB623BB}.Debug|Any CPU.Build.0 = Debug|Any CPU {5A28EF92-805F-44E9-8E13-EA9A5BB623BB}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -37,6 +33,10 @@ Global {288943FE-E3E6-49E2-8C6F-9B5B9242266C}.Debug|Any CPU.Build.0 = Debug|Any CPU {288943FE-E3E6-49E2-8C6F-9B5B9242266C}.Release|Any CPU.ActiveCfg = Release|Any CPU {288943FE-E3E6-49E2-8C6F-9B5B9242266C}.Release|Any CPU.Build.0 = Release|Any CPU + {715CAABD-41C8-4CF4-95FC-204705D2B3E6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {715CAABD-41C8-4CF4-95FC-204705D2B3E6}.Debug|Any CPU.Build.0 = Debug|Any CPU + {715CAABD-41C8-4CF4-95FC-204705D2B3E6}.Release|Any CPU.ActiveCfg = Release|Any CPU + {715CAABD-41C8-4CF4-95FC-204705D2B3E6}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {E29A838A-6D2A-4EC6-B2E5-7D53DB491A3F} = {B97A35A0-4616-4B3F-8F73-A66827BC98BA}