CanonSharp/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs
jackfiled 3c0d51cec5 feat: 正则词法识别器 (#1)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/1
Co-authored-by: jackfiled <xcrenchangjun@outlook.com>
Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
2024-07-29 16:59:29 +08:00

225 lines
6.4 KiB
C#

namespace CanonSharp.Common.LexicalAnalyzer;
public abstract class RegularExpression
{
public abstract NondeterministicFiniteAutomation Convert2Nfa();
/// <summary>
/// 匹配空字符串
/// </summary>
public static RegularExpression Empty => new EmptyExpression();
/// <summary>
/// 匹配单个字符
/// c
/// </summary>
/// <param name="c"></param>
/// <returns></returns>
public static RegularExpression Single(char c) => new SymbolExpression(c);
/// <summary>
/// left|right
/// </summary>
/// <param name="left"></param>
/// <param name="right"></param>
/// <returns></returns>
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
new AlternationExpression(left, right);
/// <summary>
/// left-right
/// </summary>
/// <param name="first"></param>
/// <param name="second"></param>
/// <returns></returns>
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
new ConcatenationExpression(first, second);
/// <summary>
/// inner*
/// </summary>
/// <param name="inner"></param>
/// <returns></returns>
public static RegularExpression Kleene(RegularExpression inner) => new KleeneExpression(inner);
/// <summary>
/// value
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
public static RegularExpression String(string value) => new StringExpression(value);
public static RegularExpression CharSetOf(string value) => new CharSetExpression(value.ToCharArray());
public static RegularExpression CharSetOf(Func<char, bool> predicate)
=> new CharSetExpression(Iterate(char.MinValue, char.MaxValue).Where(predicate).ToArray());
/// <summary>
/// [a-b]
/// </summary>
/// <param name="a"></param>
/// <param name="b"></param>
/// <returns></returns>
public static RegularExpression Range(char a, char b) => new CharSetExpression(Iterate(a, b).ToArray());
private static IEnumerable<char> Iterate(char a, char b)
{
for (char c = a; c <= b; c++)
{
if (c == char.MaxValue)
{
yield break;
}
yield return c;
}
}
}
public class EmptyExpression : RegularExpression
{
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicState final = new();
NondeterministicState start = new();
start.AddTransaction(EmptyChar.Empty, final);
return new NondeterministicFiniteAutomation(start, [final]);
}
}
public class SymbolExpression(char symbol) : RegularExpression
{
public char Symbol { get; } = symbol;
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicState final = new();
NondeterministicState start = new();
start.AddTransaction(new EmptyChar(Symbol), final);
return new NondeterministicFiniteAutomation(start, [final]);
}
}
public class AlternationExpression(RegularExpression left, RegularExpression right) : RegularExpression
{
public RegularExpression Left { get; } = left;
public RegularExpression Right { get; } = right;
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicFiniteAutomation left = Left.Convert2Nfa();
NondeterministicFiniteAutomation right = Right.Convert2Nfa();
NondeterministicState final = new();
foreach (NondeterministicState state in left.FinalStates.Concat(right.FinalStates))
{
state.AddTransaction(EmptyChar.Empty, final);
}
NondeterministicState start = new();
start.AddTransaction(EmptyChar.Empty, left.Start);
start.AddTransaction(EmptyChar.Empty, right.Start);
return new NondeterministicFiniteAutomation(start, [final]);
}
}
public class ConcatenationExpression(RegularExpression first, RegularExpression second) : RegularExpression
{
public RegularExpression First { get; } = first;
public RegularExpression Second { get; } = second;
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicFiniteAutomation first = First.Convert2Nfa();
NondeterministicFiniteAutomation second = Second.Convert2Nfa();
foreach (NondeterministicState state in first.FinalStates)
{
state.AddTransaction(EmptyChar.Empty, second.Start);
}
return new NondeterministicFiniteAutomation(first.Start, second.FinalStates);
}
}
public class KleeneExpression(RegularExpression inner) : RegularExpression
{
public RegularExpression Inner { get; } = inner;
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicFiniteAutomation inner = Inner.Convert2Nfa();
NondeterministicState final = new();
final.AddTransaction(EmptyChar.Empty, inner.Start);
foreach (NondeterministicState state in inner.FinalStates)
{
state.AddTransaction(EmptyChar.Empty, final);
}
return new NondeterministicFiniteAutomation(final, [final]);
}
}
public class CharSetExpression : RegularExpression
{
public char[] Set { get; }
public CharSetExpression(Span<char> set)
{
if (set.Length == 0)
{
throw new InvalidOperationException();
}
Set = set.ToArray();
}
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicState start = new();
NondeterministicState final = new();
foreach (char c in Set)
{
start.AddTransaction(new EmptyChar(c), final);
}
return new NondeterministicFiniteAutomation(start, [final]);
}
}
public class StringExpression : RegularExpression
{
public string Word { get; }
public StringExpression(string word)
{
if (string.IsNullOrEmpty(word))
{
throw new InvalidOperationException();
}
Word = word;
}
public override NondeterministicFiniteAutomation Convert2Nfa()
{
NondeterministicState start = new();
NondeterministicState final = Word.Aggregate(start, (state, c) =>
{
NondeterministicState next = new();
state.AddTransaction(new EmptyChar(c), next);
return next;
});
return new NondeterministicFiniteAutomation(start, [final]);
}
}