feat: 正则词法识别器 (#1)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/1 Co-authored-by: jackfiled <xcrenchangjun@outlook.com> Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
This commit is contained in:
parent
6ff8622906
commit
3c0d51cec5
17
CanonSharp.Common/Abstractions/ISourceReader.cs
Normal file
17
CanonSharp.Common/Abstractions/ISourceReader.cs
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
namespace CanonSharp.Common.Abstractions;
|
||||||
|
|
||||||
|
public interface ISourceReader
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// 偷看一下下一个字符
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="c">看到的下一个字符</param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public bool TryPeek(out char c);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 读取下一个字符
|
||||||
|
/// </summary>
|
||||||
|
/// <returns></returns>
|
||||||
|
public char Read();
|
||||||
|
}
|
|
@ -1,11 +1,13 @@
|
||||||
namespace CanonSharp.Common.LexicalAnalyzer;
|
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||||
|
|
||||||
public class DeterministicState : IEquatable<DeterministicState>
|
public class DeterministicState(HashSet<NondeterministicState> closure) : IEquatable<DeterministicState>
|
||||||
{
|
{
|
||||||
public Guid Id { get; } = Guid.NewGuid();
|
public Guid Id { get; } = Guid.NewGuid();
|
||||||
|
|
||||||
public Dictionary<char, DeterministicState> Transaction { get; } = [];
|
public Dictionary<char, DeterministicState> Transaction { get; } = [];
|
||||||
|
|
||||||
|
public HashSet<NondeterministicState> Closure { get; } = closure;
|
||||||
|
|
||||||
public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);
|
public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);
|
||||||
|
|
||||||
public override bool Equals(object? obj) => obj is DeterministicState other && Equals(other);
|
public override bool Equals(object? obj) => obj is DeterministicState other && Equals(other);
|
||||||
|
@ -35,7 +37,7 @@ public class DeterministicFiniteAutomation
|
||||||
HashSet<DeterministicState> finalStates = [];
|
HashSet<DeterministicState> finalStates = [];
|
||||||
|
|
||||||
HashSet<NondeterministicState> startClosure = nfa.Start.CalculateEmptyClosure();
|
HashSet<NondeterministicState> startClosure = nfa.Start.CalculateEmptyClosure();
|
||||||
DeterministicState start = new();
|
DeterministicState start = new(startClosure);
|
||||||
map.Add(new NondeterministicStateSet(startClosure), start);
|
map.Add(new NondeterministicStateSet(startClosure), start);
|
||||||
queue.Enqueue(new Pair(startClosure, start));
|
queue.Enqueue(new Pair(startClosure, start));
|
||||||
|
|
||||||
|
@ -50,7 +52,7 @@ public class DeterministicFiniteAutomation
|
||||||
|
|
||||||
foreach (NondeterministicState state in pair.States)
|
foreach (NondeterministicState state in pair.States)
|
||||||
{
|
{
|
||||||
foreach (KeyValuePair<EmptyChar,HashSet<NondeterministicState>> transaction in
|
foreach (KeyValuePair<EmptyChar, HashSet<NondeterministicState>> transaction in
|
||||||
state.Transactions.Where(p => !p.Key.IsEmpty))
|
state.Transactions.Where(p => !p.Key.IsEmpty))
|
||||||
{
|
{
|
||||||
HashSet<NondeterministicState> closure = [];
|
HashSet<NondeterministicState> closure = [];
|
||||||
|
@ -64,16 +66,19 @@ public class DeterministicFiniteAutomation
|
||||||
{
|
{
|
||||||
n.UnionWith(closure);
|
n.UnionWith(closure);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
next.Add(transaction.Key.Char, closure);
|
next.Add(transaction.Key.Char, closure);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
foreach (KeyValuePair<char,HashSet<NondeterministicState>> transaction in next)
|
foreach (KeyValuePair<char, HashSet<NondeterministicState>> transaction in next)
|
||||||
{
|
{
|
||||||
NondeterministicStateSet set = new(transaction.Value);
|
NondeterministicStateSet set = new(transaction.Value);
|
||||||
if (!map.TryGetValue(set, out DeterministicState? nextState))
|
if (!map.TryGetValue(set, out DeterministicState? nextState))
|
||||||
{
|
{
|
||||||
nextState = new DeterministicState();
|
nextState = new DeterministicState(transaction.Value);
|
||||||
map.Add(set, nextState);
|
map.Add(set, nextState);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
92
CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs
Normal file
92
CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
using System.Diagnostics.CodeAnalysis;
|
||||||
|
using CanonSharp.Common.Abstractions;
|
||||||
|
|
||||||
|
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||||
|
|
||||||
|
public class LexicalScanner(
|
||||||
|
DeterministicState startState,
|
||||||
|
Dictionary<DeterministicState, LexicalToken> finalStateMap,
|
||||||
|
HashSet<LexicalToken> skippedTokens,
|
||||||
|
ISourceReader reader)
|
||||||
|
{
|
||||||
|
private readonly DeterministicState _startState = startState;
|
||||||
|
|
||||||
|
private readonly List<char> _readHistory = [];
|
||||||
|
|
||||||
|
private DeterministicState _currentState = startState;
|
||||||
|
|
||||||
|
public bool TryRead([NotNullWhen(true)] out LexicalToken? token)
|
||||||
|
{
|
||||||
|
while (TryReadInternal(out token))
|
||||||
|
{
|
||||||
|
if (!skippedTokens.Contains(token))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private bool TryReadInternal([NotNullWhen(true)] out LexicalToken? token)
|
||||||
|
{
|
||||||
|
while (reader.TryPeek(out char c))
|
||||||
|
{
|
||||||
|
if (_currentState.Transaction.TryGetValue(c, out DeterministicState? nextState))
|
||||||
|
{
|
||||||
|
// 可以迁移到下一个状态
|
||||||
|
_currentState = nextState;
|
||||||
|
_readHistory.Add(reader.Read());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// 无法迁移到下一个状态
|
||||||
|
if (!finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 当前状态是终止状态
|
||||||
|
token = new LexicalToken(possibleToken, new string(_readHistory.ToArray()));
|
||||||
|
|
||||||
|
// 重置状态
|
||||||
|
_readHistory.Clear();
|
||||||
|
_currentState = _startState;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 当前状态是终止状态
|
||||||
|
if (finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken2))
|
||||||
|
{
|
||||||
|
token = new LexicalToken(possibleToken2, new string(_readHistory.ToArray()));
|
||||||
|
|
||||||
|
_readHistory.Clear();
|
||||||
|
_currentState = _startState;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!_currentState.Equals(_startState))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
token = null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LexicalScannerBuilder CreateDefaultBuilder()
|
||||||
|
{
|
||||||
|
LexicalScannerBuilder builder = new();
|
||||||
|
|
||||||
|
builder.DefineToken(LexicalToken.LineBreaker);
|
||||||
|
builder.DefineToken(LexicalToken.WhiteSpace);
|
||||||
|
|
||||||
|
builder.AddSkippedToken(LexicalToken.LineBreaker);
|
||||||
|
builder.AddSkippedToken(LexicalToken.WhiteSpace);
|
||||||
|
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static LexicalScannerBuilder CreateEmptyBuilder() => new();
|
||||||
|
}
|
62
CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs
Normal file
62
CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
using CanonSharp.Common.Abstractions;
|
||||||
|
|
||||||
|
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||||
|
|
||||||
|
public class LexicalScannerBuilder
|
||||||
|
{
|
||||||
|
private readonly Dictionary<NondeterministicState, LexicalToken> _finalStateMap = [];
|
||||||
|
private readonly List<NondeterministicFiniteAutomation> _nondeterministicFiniteAutomations = [];
|
||||||
|
private readonly HashSet<LexicalToken> _skippedTokens = [];
|
||||||
|
|
||||||
|
internal LexicalScannerBuilder()
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void DefineToken(LexicalToken token)
|
||||||
|
{
|
||||||
|
NondeterministicFiniteAutomation automation = token.Expression.Convert2Nfa();
|
||||||
|
_nondeterministicFiniteAutomations.Add(automation);
|
||||||
|
|
||||||
|
foreach (NondeterministicState state in automation.FinalStates)
|
||||||
|
{
|
||||||
|
_finalStateMap.Add(state, token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
|
||||||
|
|
||||||
|
public LexicalScanner Build(ISourceReader reader)
|
||||||
|
{
|
||||||
|
NondeterministicFiniteAutomation finaAutomation = Combine();
|
||||||
|
DeterministicFiniteAutomation deterministicFiniteAutomation =
|
||||||
|
DeterministicFiniteAutomation.Create(finaAutomation);
|
||||||
|
|
||||||
|
Dictionary<DeterministicState, LexicalToken> finalTokenMap = [];
|
||||||
|
|
||||||
|
foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
|
||||||
|
{
|
||||||
|
finalTokenMap.Add(state, state.Closure
|
||||||
|
.Where(s => _finalStateMap.ContainsKey(s))
|
||||||
|
.Select(s => _finalStateMap[s])
|
||||||
|
.OrderByDescending(t => t.Priority)
|
||||||
|
.First());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new LexicalScanner(deterministicFiniteAutomation.Start, finalTokenMap, _skippedTokens, reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
private NondeterministicFiniteAutomation Combine()
|
||||||
|
{
|
||||||
|
NondeterministicState head = new();
|
||||||
|
NondeterministicFiniteAutomation result = new(head, []);
|
||||||
|
|
||||||
|
foreach (NondeterministicFiniteAutomation automation in _nondeterministicFiniteAutomations)
|
||||||
|
{
|
||||||
|
head.AddTransaction(EmptyChar.Empty, automation.Start);
|
||||||
|
result.FinalStates.UnionWith(automation.FinalStates);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
56
CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs
Normal file
56
CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
using System.Globalization;
|
||||||
|
|
||||||
|
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||||
|
|
||||||
|
public class LexicalToken : IEquatable<LexicalToken>
|
||||||
|
{
|
||||||
|
private readonly Guid _tokenId;
|
||||||
|
|
||||||
|
public RegularExpression Expression { get; }
|
||||||
|
|
||||||
|
public int Priority { get; }
|
||||||
|
|
||||||
|
public LexicalToken(RegularExpression expression, int priority)
|
||||||
|
{
|
||||||
|
_tokenId = Guid.NewGuid();
|
||||||
|
Expression = expression;
|
||||||
|
Priority = priority;
|
||||||
|
LiteralValue = string.Empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal LexicalToken(LexicalToken definition, string literalValue)
|
||||||
|
{
|
||||||
|
_tokenId = definition._tokenId;
|
||||||
|
Expression = definition.Expression;
|
||||||
|
Priority = definition.Priority;
|
||||||
|
LiteralValue = literalValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public string LiteralValue { get; }
|
||||||
|
|
||||||
|
public bool Equals(LexicalToken? other) => other is not null && _tokenId == other._tokenId;
|
||||||
|
|
||||||
|
public override bool Equals(object? obj) => obj is LexicalToken other && Equals(other);
|
||||||
|
|
||||||
|
public override int GetHashCode() => _tokenId.GetHashCode();
|
||||||
|
|
||||||
|
public static bool operator ==(LexicalToken a, LexicalToken b) => a.Equals(b);
|
||||||
|
|
||||||
|
public static bool operator !=(LexicalToken a, LexicalToken b) => !(a == b);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 匹配所有的空白字符
|
||||||
|
/// </summary>
|
||||||
|
public static readonly LexicalToken WhiteSpace = new(
|
||||||
|
RegularExpression.Alternate(
|
||||||
|
RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator),
|
||||||
|
RegularExpression.CharSetOf("\u0009\u000B\u000C")), int.MinValue);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 匹配所有的换行符
|
||||||
|
/// </summary>
|
||||||
|
public static readonly LexicalToken LineBreaker = new(
|
||||||
|
RegularExpression.Alternate(
|
||||||
|
RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029"),
|
||||||
|
RegularExpression.String("\r\n")), int.MinValue);
|
||||||
|
}
|
|
@ -4,17 +4,76 @@ public abstract class RegularExpression
|
||||||
{
|
{
|
||||||
public abstract NondeterministicFiniteAutomation Convert2Nfa();
|
public abstract NondeterministicFiniteAutomation Convert2Nfa();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 匹配空字符串
|
||||||
|
/// </summary>
|
||||||
public static RegularExpression Empty => new EmptyExpression();
|
public static RegularExpression Empty => new EmptyExpression();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 匹配单个字符
|
||||||
|
/// c
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="c"></param>
|
||||||
|
/// <returns></returns>
|
||||||
public static RegularExpression Single(char c) => new SymbolExpression(c);
|
public static RegularExpression Single(char c) => new SymbolExpression(c);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// left|right
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="left"></param>
|
||||||
|
/// <param name="right"></param>
|
||||||
|
/// <returns></returns>
|
||||||
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
|
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
|
||||||
new AlternationExpression(left, right);
|
new AlternationExpression(left, right);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// left-right
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="first"></param>
|
||||||
|
/// <param name="second"></param>
|
||||||
|
/// <returns></returns>
|
||||||
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
|
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
|
||||||
new ConcatenationExpression(first, second);
|
new ConcatenationExpression(first, second);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// inner*
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="inner"></param>
|
||||||
|
/// <returns></returns>
|
||||||
public static RegularExpression Kleene(RegularExpression inner) => new KleeneExpression(inner);
|
public static RegularExpression Kleene(RegularExpression inner) => new KleeneExpression(inner);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// value
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public static RegularExpression String(string value) => new StringExpression(value);
|
||||||
|
|
||||||
|
public static RegularExpression CharSetOf(string value) => new CharSetExpression(value.ToCharArray());
|
||||||
|
|
||||||
|
public static RegularExpression CharSetOf(Func<char, bool> predicate)
|
||||||
|
=> new CharSetExpression(Iterate(char.MinValue, char.MaxValue).Where(predicate).ToArray());
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// [a-b]
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a"></param>
|
||||||
|
/// <param name="b"></param>
|
||||||
|
/// <returns></returns>
|
||||||
|
public static RegularExpression Range(char a, char b) => new CharSetExpression(Iterate(a, b).ToArray());
|
||||||
|
|
||||||
|
private static IEnumerable<char> Iterate(char a, char b)
|
||||||
|
{
|
||||||
|
for (char c = a; c <= b; c++)
|
||||||
|
{
|
||||||
|
if (c == char.MaxValue)
|
||||||
|
{
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
|
yield return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public class EmptyExpression : RegularExpression
|
public class EmptyExpression : RegularExpression
|
||||||
|
@ -107,3 +166,59 @@ public class KleeneExpression(RegularExpression inner) : RegularExpression
|
||||||
return new NondeterministicFiniteAutomation(final, [final]);
|
return new NondeterministicFiniteAutomation(final, [final]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class CharSetExpression : RegularExpression
|
||||||
|
{
|
||||||
|
public char[] Set { get; }
|
||||||
|
|
||||||
|
public CharSetExpression(Span<char> set)
|
||||||
|
{
|
||||||
|
if (set.Length == 0)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
Set = set.ToArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
public override NondeterministicFiniteAutomation Convert2Nfa()
|
||||||
|
{
|
||||||
|
NondeterministicState start = new();
|
||||||
|
NondeterministicState final = new();
|
||||||
|
|
||||||
|
foreach (char c in Set)
|
||||||
|
{
|
||||||
|
start.AddTransaction(new EmptyChar(c), final);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new NondeterministicFiniteAutomation(start, [final]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class StringExpression : RegularExpression
|
||||||
|
{
|
||||||
|
public string Word { get; }
|
||||||
|
|
||||||
|
public StringExpression(string word)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(word))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
Word = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override NondeterministicFiniteAutomation Convert2Nfa()
|
||||||
|
{
|
||||||
|
NondeterministicState start = new();
|
||||||
|
NondeterministicState final = Word.Aggregate(start, (state, c) =>
|
||||||
|
{
|
||||||
|
NondeterministicState next = new();
|
||||||
|
state.AddTransaction(new EmptyChar(c), next);
|
||||||
|
return next;
|
||||||
|
});
|
||||||
|
|
||||||
|
return new NondeterministicFiniteAutomation(start, [final]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
78
CanonSharp.Common/Reader/SourceReader.cs
Normal file
78
CanonSharp.Common/Reader/SourceReader.cs
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
using CanonSharp.Common.Abstractions;
|
||||||
|
|
||||||
|
namespace CanonSharp.Common.Reader;
|
||||||
|
|
||||||
|
public class SourceReader : ISourceReader
|
||||||
|
{
|
||||||
|
private readonly StreamReader _reader;
|
||||||
|
private char? _lookAhead;
|
||||||
|
|
||||||
|
public SourceReader(string filename)
|
||||||
|
{
|
||||||
|
FileInfo source = new(filename);
|
||||||
|
|
||||||
|
if (!source.Exists)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
_reader = new StreamReader(filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
public char Read()
|
||||||
|
{
|
||||||
|
if (_lookAhead.HasValue)
|
||||||
|
{
|
||||||
|
char result = _lookAhead.Value;
|
||||||
|
_lookAhead = null;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!TryFetchChar(out char c))
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool TryPeek(out char c)
|
||||||
|
{
|
||||||
|
if (_lookAhead.HasValue)
|
||||||
|
{
|
||||||
|
c = _lookAhead.Value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!TryFetchChar(out c))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
_lookAhead = c;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private readonly char[] _buffer = new char[1024];
|
||||||
|
private int _length;
|
||||||
|
private int _count;
|
||||||
|
|
||||||
|
private bool TryFetchChar(out char c)
|
||||||
|
{
|
||||||
|
if (_length == _count)
|
||||||
|
{
|
||||||
|
_length = _reader.Read(_buffer);
|
||||||
|
_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_length == 0)
|
||||||
|
{
|
||||||
|
c = char.MinValue;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = _buffer[_count];
|
||||||
|
_count += 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
33
CanonSharp.Common/Reader/StringReader.cs
Normal file
33
CanonSharp.Common/Reader/StringReader.cs
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
using CanonSharp.Common.Abstractions;
|
||||||
|
|
||||||
|
namespace CanonSharp.Common.Reader;
|
||||||
|
|
||||||
|
public class StringReader(string source) : ISourceReader
|
||||||
|
{
|
||||||
|
private int _pos;
|
||||||
|
|
||||||
|
public char Read()
|
||||||
|
{
|
||||||
|
if (_pos >= source.Length)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
char result = source[_pos];
|
||||||
|
_pos += 1;
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public bool TryPeek(out char c)
|
||||||
|
{
|
||||||
|
if (_pos < source.Length)
|
||||||
|
{
|
||||||
|
c = source[_pos];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = char.MinValue;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
25
CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs
Normal file
25
CanonSharp.Tests/LexicalAnalyzerTests/ReaderTests.cs
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
using CanonSharp.Common.Abstractions;
|
||||||
|
using StringReader = CanonSharp.Common.Reader.StringReader;
|
||||||
|
|
||||||
|
namespace CanonSharp.Tests.LexicalAnalyzerTests;
|
||||||
|
|
||||||
|
public class ReaderTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void StringReaderTest()
|
||||||
|
{
|
||||||
|
StringReader reader = new("ab");
|
||||||
|
|
||||||
|
Assert.True(reader.TryPeek(out char c));
|
||||||
|
Assert.Equal('a', c);
|
||||||
|
Assert.True(reader.TryPeek(out c));
|
||||||
|
Assert.Equal('a', c);
|
||||||
|
Assert.Equal('a', reader.Read());
|
||||||
|
Assert.True(reader.TryPeek(out c));
|
||||||
|
Assert.Equal('b', c);
|
||||||
|
Assert.True(reader.TryPeek(out c));
|
||||||
|
Assert.Equal('b', c);
|
||||||
|
Assert.Equal('b', reader.Read());
|
||||||
|
Assert.False(reader.TryPeek(out c));
|
||||||
|
}
|
||||||
|
}
|
|
@ -43,6 +43,15 @@ public class RegularExpressionTests
|
||||||
s => s.Transactions.ContainsKey(new EmptyChar('a')));
|
s => s.Transactions.ContainsKey(new EmptyChar('a')));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void RangeTest()
|
||||||
|
{
|
||||||
|
RegularExpression expression = RegularExpression.Range('a', 'z');
|
||||||
|
NondeterministicFiniteAutomation nfa = expression.Convert2Nfa();
|
||||||
|
|
||||||
|
Assert.Equal(26, nfa.Start.Transactions.Count);
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void ConvertTest()
|
public void ConvertTest()
|
||||||
{
|
{
|
||||||
|
@ -74,4 +83,22 @@ public class RegularExpressionTests
|
||||||
|
|
||||||
Assert.Equal('a', map[new NondeterministicStateSet([key2, key1])]);
|
Assert.Equal('a', map[new NondeterministicStateSet([key2, key1])]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void PrefixConvertTest()
|
||||||
|
{
|
||||||
|
RegularExpression expression = RegularExpression.Alternate(
|
||||||
|
RegularExpression.String("string"),
|
||||||
|
RegularExpression.String("string1"));
|
||||||
|
|
||||||
|
NondeterministicFiniteAutomation nfa = expression.Convert2Nfa();
|
||||||
|
DeterministicFiniteAutomation.Create(nfa);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void WhiteSpaceConvertTest()
|
||||||
|
{
|
||||||
|
NondeterministicFiniteAutomation nfa = LexicalToken.WhiteSpace.Expression.Convert2Nfa();
|
||||||
|
DeterministicFiniteAutomation.Create(nfa);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
71
CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs
Normal file
71
CanonSharp.Tests/LexicalAnalyzerTests/ScanTests.cs
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
using CanonSharp.Common.LexicalAnalyzer;
|
||||||
|
using StringReader = CanonSharp.Common.Reader.StringReader;
|
||||||
|
|
||||||
|
namespace CanonSharp.Tests.LexicalAnalyzerTests;
|
||||||
|
|
||||||
|
public class ScanTests
|
||||||
|
{
|
||||||
|
[Fact]
|
||||||
|
public void ScanTest1()
|
||||||
|
{
|
||||||
|
LexicalScannerBuilder builder = LexicalScanner.CreateEmptyBuilder();
|
||||||
|
|
||||||
|
LexicalToken token1 = new(RegularExpression.String("ab"), 1);
|
||||||
|
builder.DefineToken(token1);
|
||||||
|
|
||||||
|
StringReader reader = new("ab");
|
||||||
|
LexicalScanner scanner = builder.Build(reader);
|
||||||
|
|
||||||
|
Assert.True(scanner.TryRead(out LexicalToken? result));
|
||||||
|
Assert.Equal(token1, result);
|
||||||
|
Assert.Equal("ab", result.LiteralValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void ScanTest2()
|
||||||
|
{
|
||||||
|
LexicalScannerBuilder builder = LexicalScanner.CreateDefaultBuilder();
|
||||||
|
|
||||||
|
LexicalToken stringKeyword = new(RegularExpression.String("string"), 100);
|
||||||
|
LexicalToken periodDelimiter = new(RegularExpression.Single('.'), 100);
|
||||||
|
LexicalToken semiColonDelimiter = new(RegularExpression.Single(';'), 100);
|
||||||
|
LexicalToken identifier = new(RegularExpression.Concatenate(RegularExpression.Range('a', 'z'),
|
||||||
|
RegularExpression.Kleene(RegularExpression.Range('a', 'z'))), 0);
|
||||||
|
LexicalToken assigner = new(RegularExpression.String(":="), 100);
|
||||||
|
builder.DefineToken(stringKeyword);
|
||||||
|
builder.DefineToken(periodDelimiter);
|
||||||
|
builder.DefineToken(semiColonDelimiter);
|
||||||
|
builder.DefineToken(identifier);
|
||||||
|
builder.DefineToken(assigner);
|
||||||
|
|
||||||
|
StringReader reader = new("""
|
||||||
|
string value := origin;
|
||||||
|
string abc := value.
|
||||||
|
""");
|
||||||
|
LexicalScanner scanner = builder.Build(reader);
|
||||||
|
Validate(scanner, [
|
||||||
|
stringKeyword,
|
||||||
|
identifier,
|
||||||
|
assigner,
|
||||||
|
identifier,
|
||||||
|
semiColonDelimiter,
|
||||||
|
stringKeyword,
|
||||||
|
identifier,
|
||||||
|
assigner,
|
||||||
|
identifier,
|
||||||
|
periodDelimiter
|
||||||
|
]);
|
||||||
|
|
||||||
|
Assert.False(scanner.TryRead(out _));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Validate(LexicalScanner scanner, IEnumerable<LexicalToken> expectedTokens)
|
||||||
|
{
|
||||||
|
foreach (LexicalToken token in expectedTokens)
|
||||||
|
{
|
||||||
|
Assert.True(scanner.TryRead(out LexicalToken? outToken));
|
||||||
|
Assert.NotNull(outToken);
|
||||||
|
Assert.Equal(token, outToken);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user