using CanonSharp.Common.Abstractions; namespace CanonSharp.Common.LexicalAnalyzer; public class LexicalScannerBuilder { private readonly Dictionary _finalStateMap = []; private readonly List _nondeterministicFiniteAutomations = []; private readonly HashSet _skippedTokens = []; internal LexicalScannerBuilder() { } public void DefineToken(LexicalToken token) { NondeterministicFiniteAutomation automation = token.Expression.Convert2Nfa(); _nondeterministicFiniteAutomations.Add(automation); foreach (NondeterministicState state in automation.FinalStates) { _finalStateMap.Add(state, token); } } /// /// 定义词法令牌 /// /// 该令牌的正则表达式 /// 识别该令牌的优先级 /// 定义好的词法令牌 public LexicalToken DefineToken(RegularExpression expression, int priority) { LexicalToken token = new(expression, priority); DefineToken(token); return token; } /// /// 定义输出时需要跳过的词法令牌 /// /// 该令牌的正则表达式 /// 该令牌的优先级 public void DefineSkippedToken(RegularExpression expression, int priority) { LexicalToken token = DefineToken(expression, priority); AddSkippedToken(token); } public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token); public LexicalScanner Build(ISourceReader reader) { NondeterministicFiniteAutomation finaAutomation = Combine(); DeterministicFiniteAutomation deterministicFiniteAutomation = DeterministicFiniteAutomation.Create(finaAutomation); Dictionary finalTokenMap = []; foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates) { finalTokenMap.Add(state.StripClosure(), state.Closure .Where(s => _finalStateMap.ContainsKey(s)) .Select(s => _finalStateMap[s]) .OrderByDescending(t => t.Priority) .First()); } // 清除在分析中不需要的Closure引用 // 释放内存占用 Queue queue = []; HashSet visited = [deterministicFiniteAutomation.Start]; DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure(); queue.Enqueue(strippedStartState); while (queue.TryDequeue(out DeterministicState? state)) { Dictionary transactions = []; foreach (KeyValuePair pair in state.Transaction) { transactions.Add(pair.Key, pair.Value.StripClosure()); } state.Transaction.Clear(); foreach (KeyValuePair pair in transactions) { state.Transaction.Add(pair.Key, pair.Value); if (visited.Add(pair.Value)) { queue.Enqueue(pair.Value); } } } return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader); } private NondeterministicFiniteAutomation Combine() { NondeterministicState head = new(); NondeterministicFiniteAutomation result = new(head, []); foreach (NondeterministicFiniteAutomation automation in _nondeterministicFiniteAutomations) { head.AddTransaction(EmptyChar.Empty, automation.Start); result.FinalStates.UnionWith(automation.FinalStates); } return result; } }