114 lines
3.9 KiB
C#
114 lines
3.9 KiB
C#
using CanonSharp.Common.Abstractions;
|
|
|
|
namespace CanonSharp.Common.LexicalAnalyzer;
|
|
|
|
public class LexicalScannerBuilder
|
|
{
|
|
private readonly Dictionary<NondeterministicState, LexicalToken> _finalStateMap = [];
|
|
private readonly List<NondeterministicFiniteAutomation> _nondeterministicFiniteAutomations = [];
|
|
private readonly HashSet<LexicalToken> _skippedTokens = [];
|
|
|
|
internal LexicalScannerBuilder()
|
|
{
|
|
|
|
}
|
|
|
|
public void DefineToken(LexicalToken token)
|
|
{
|
|
NondeterministicFiniteAutomation automation = token.Expression.Convert2Nfa();
|
|
_nondeterministicFiniteAutomations.Add(automation);
|
|
|
|
foreach (NondeterministicState state in automation.FinalStates)
|
|
{
|
|
_finalStateMap.Add(state, token);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// 定义词法令牌
|
|
/// </summary>
|
|
/// <param name="expression">该令牌的正则表达式</param>
|
|
/// <param name="priority">识别该令牌的优先级</param>
|
|
/// <returns>定义好的词法令牌</returns>
|
|
public LexicalToken DefineToken(RegularExpression expression, int priority)
|
|
{
|
|
LexicalToken token = new(expression, priority);
|
|
DefineToken(token);
|
|
return token;
|
|
}
|
|
|
|
/// <summary>
|
|
/// 定义输出时需要跳过的词法令牌
|
|
/// </summary>
|
|
/// <param name="expression">该令牌的正则表达式</param>
|
|
/// <param name="priority">该令牌的优先级</param>
|
|
public void DefineSkippedToken(RegularExpression expression, int priority)
|
|
{
|
|
LexicalToken token = DefineToken(expression, priority);
|
|
AddSkippedToken(token);
|
|
}
|
|
|
|
public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
|
|
|
|
public LexicalScanner Build(ISourceReader reader)
|
|
{
|
|
NondeterministicFiniteAutomation finaAutomation = Combine();
|
|
DeterministicFiniteAutomation deterministicFiniteAutomation =
|
|
DeterministicFiniteAutomation.Create(finaAutomation);
|
|
|
|
Dictionary<DeterministicState, LexicalToken> finalTokenMap = [];
|
|
|
|
foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
|
|
{
|
|
finalTokenMap.Add(state.StripClosure(), state.Closure
|
|
.Where(s => _finalStateMap.ContainsKey(s))
|
|
.Select(s => _finalStateMap[s])
|
|
.OrderByDescending(t => t.Priority)
|
|
.First());
|
|
}
|
|
|
|
// 清除在分析中不需要的Closure引用
|
|
// 释放内存占用
|
|
Queue<DeterministicState> queue = [];
|
|
HashSet<DeterministicState> visited = [deterministicFiniteAutomation.Start];
|
|
DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure();
|
|
queue.Enqueue(strippedStartState);
|
|
|
|
while (queue.TryDequeue(out DeterministicState? state))
|
|
{
|
|
Dictionary<char, DeterministicState> transactions = [];
|
|
|
|
foreach (KeyValuePair<char,DeterministicState> pair in state.Transaction)
|
|
{
|
|
transactions.Add(pair.Key, pair.Value.StripClosure());
|
|
}
|
|
|
|
state.Transaction.Clear();
|
|
foreach (KeyValuePair<char,DeterministicState> pair in transactions)
|
|
{
|
|
state.Transaction.Add(pair.Key, pair.Value);
|
|
if (visited.Add(pair.Value))
|
|
{
|
|
queue.Enqueue(pair.Value);
|
|
}
|
|
}
|
|
}
|
|
|
|
return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader);
|
|
}
|
|
|
|
private NondeterministicFiniteAutomation Combine()
|
|
{
|
|
NondeterministicState head = new();
|
|
NondeterministicFiniteAutomation result = new(head, []);
|
|
|
|
foreach (NondeterministicFiniteAutomation automation in _nondeterministicFiniteAutomations)
|
|
{
|
|
head.AddTransaction(EmptyChar.Empty, automation.Start);
|
|
result.FinalStates.UnionWith(automation.FinalStates);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|