From 57c31ec43565cb39d796bbf3592afcd6ef08fb0f Mon Sep 17 00:00:00 2001 From: jackfiled Date: Tue, 30 Jul 2024 14:35:03 +0800 Subject: [PATCH] fix: strip unused information and some operator override --- .../DeterministicFiniteAutomation.cs | 21 ++++++- .../LexicalAnalyzer/LexicalScannerBuilder.cs | 55 ++++++++++++++++++- .../LexicalAnalyzer/LexicalToken.cs | 10 ++-- .../LexicalAnalyzer/RegularExpression.cs | 6 ++ 4 files changed, 81 insertions(+), 11 deletions(-) diff --git a/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs b/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs index eaf844f..bd789ff 100644 --- a/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs +++ b/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs @@ -1,12 +1,27 @@ namespace CanonSharp.Common.LexicalAnalyzer; -public class DeterministicState(HashSet closure) : IEquatable +public class DeterministicState : IEquatable { - public Guid Id { get; } = Guid.NewGuid(); + public Guid Id { get; } public Dictionary Transaction { get; } = []; - public HashSet Closure { get; } = closure; + public HashSet Closure { get; } + + public DeterministicState(HashSet closure) + { + Id = Guid.NewGuid(); + Closure = closure; + } + + private DeterministicState(DeterministicState state) + { + Id = state.Id; + Transaction = state.Transaction; + Closure = []; + } + + public DeterministicState StripClosure() => new(this); public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id); diff --git a/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs b/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs index 5cdf402..c04c872 100644 --- a/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs +++ b/CanonSharp.Common/LexicalAnalyzer/LexicalScannerBuilder.cs @@ -24,6 +24,30 @@ public class LexicalScannerBuilder } } + /// + /// 定义词法令牌 + /// + /// 该令牌的正则表达式 + /// 识别该令牌的优先级 + /// 定义好的词法令牌 + public LexicalToken DefineToken(RegularExpression expression, int priority) + { + LexicalToken token = new(expression, priority); + DefineToken(token); + return token; + } + + /// + /// 定义输出时需要跳过的词法令牌 + /// + /// 该令牌的正则表达式 + /// 该令牌的优先级 + public void DefineSkippedToken(RegularExpression expression, int priority) + { + LexicalToken token = DefineToken(expression, priority); + AddSkippedToken(token); + } + public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token); public LexicalScanner Build(ISourceReader reader) @@ -36,14 +60,41 @@ public class LexicalScannerBuilder foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates) { - finalTokenMap.Add(state, state.Closure + finalTokenMap.Add(state.StripClosure(), state.Closure .Where(s => _finalStateMap.ContainsKey(s)) .Select(s => _finalStateMap[s]) .OrderByDescending(t => t.Priority) .First()); } - return new LexicalScanner(deterministicFiniteAutomation.Start, finalTokenMap, _skippedTokens, reader); + // 清除在分析中不需要的Closure引用 + // 释放内存占用 + Queue queue = []; + HashSet visited = [deterministicFiniteAutomation.Start]; + DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure(); + queue.Enqueue(strippedStartState); + + while (queue.TryDequeue(out DeterministicState? state)) + { + Dictionary transactions = []; + + foreach (KeyValuePair pair in state.Transaction) + { + transactions.Add(pair.Key, pair.Value.StripClosure()); + } + + state.Transaction.Clear(); + foreach (KeyValuePair pair in transactions) + { + state.Transaction.Add(pair.Key, pair.Value); + if (visited.Add(pair.Value)) + { + queue.Enqueue(pair.Value); + } + } + } + + return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader); } private NondeterministicFiniteAutomation Combine() diff --git a/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs b/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs index e6be9eb..71d97d9 100644 --- a/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs +++ b/CanonSharp.Common/LexicalAnalyzer/LexicalToken.cs @@ -42,15 +42,13 @@ public class LexicalToken : IEquatable /// 匹配所有的空白字符 /// public static readonly LexicalToken WhiteSpace = new( - RegularExpression.Alternate( - RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator), - RegularExpression.CharSetOf("\u0009\u000B\u000C")), int.MinValue); + RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator) | + RegularExpression.CharSetOf("\u0009\u000B\u000C"), int.MinValue); /// /// 匹配所有的换行符 /// public static readonly LexicalToken LineBreaker = new( - RegularExpression.Alternate( - RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029"), - RegularExpression.String("\r\n")), int.MinValue); + RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029") | + RegularExpression.String("\r\n"), int.MinValue); } diff --git a/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs b/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs index 79a98d8..80e2b3c 100644 --- a/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs +++ b/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs @@ -26,6 +26,9 @@ public abstract class RegularExpression public static RegularExpression Alternate(RegularExpression left, RegularExpression right) => new AlternationExpression(left, right); + public static RegularExpression operator |(RegularExpression left, RegularExpression right) => + new AlternationExpression(left, right); + /// /// left-right /// @@ -35,6 +38,9 @@ public abstract class RegularExpression public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) => new ConcatenationExpression(first, second); + public static RegularExpression operator +(RegularExpression left, RegularExpression right) => + new ConcatenationExpression(left, right); + /// /// inner* ///