CanonSharp/CanonSharp.Common/LexicalAnalyzer/LexicalScanner.cs
jackfiled 3c0d51cec5 feat: 正则词法识别器 (#1)
Reviewed-on: https://git.bupt-hpc.cn/jackfiled/CanonSharp/pulls/1
Co-authored-by: jackfiled <xcrenchangjun@outlook.com>
Co-committed-by: jackfiled <xcrenchangjun@outlook.com>
2024-07-29 16:59:29 +08:00

93 lines
2.7 KiB
C#

using System.Diagnostics.CodeAnalysis;
using CanonSharp.Common.Abstractions;
namespace CanonSharp.Common.LexicalAnalyzer;
public class LexicalScanner(
DeterministicState startState,
Dictionary<DeterministicState, LexicalToken> finalStateMap,
HashSet<LexicalToken> skippedTokens,
ISourceReader reader)
{
private readonly DeterministicState _startState = startState;
private readonly List<char> _readHistory = [];
private DeterministicState _currentState = startState;
public bool TryRead([NotNullWhen(true)] out LexicalToken? token)
{
while (TryReadInternal(out token))
{
if (!skippedTokens.Contains(token))
{
return true;
}
}
return false;
}
private bool TryReadInternal([NotNullWhen(true)] out LexicalToken? token)
{
while (reader.TryPeek(out char c))
{
if (_currentState.Transaction.TryGetValue(c, out DeterministicState? nextState))
{
// 可以迁移到下一个状态
_currentState = nextState;
_readHistory.Add(reader.Read());
}
else
{
// 无法迁移到下一个状态
if (!finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken))
{
throw new InvalidOperationException();
}
// 当前状态是终止状态
token = new LexicalToken(possibleToken, new string(_readHistory.ToArray()));
// 重置状态
_readHistory.Clear();
_currentState = _startState;
return true;
}
}
// 当前状态是终止状态
if (finalStateMap.TryGetValue(_currentState, out LexicalToken? possibleToken2))
{
token = new LexicalToken(possibleToken2, new string(_readHistory.ToArray()));
_readHistory.Clear();
_currentState = _startState;
return true;
}
if (!_currentState.Equals(_startState))
{
throw new InvalidOperationException();
}
token = null;
return false;
}
public static LexicalScannerBuilder CreateDefaultBuilder()
{
LexicalScannerBuilder builder = new();
builder.DefineToken(LexicalToken.LineBreaker);
builder.DefineToken(LexicalToken.WhiteSpace);
builder.AddSkippedToken(LexicalToken.LineBreaker);
builder.AddSkippedToken(LexicalToken.WhiteSpace);
return builder;
}
public static LexicalScannerBuilder CreateEmptyBuilder() => new();
}