fix: strip unused information and some operator override

This commit is contained in:
jackfiled 2024-07-30 14:35:03 +08:00
parent 3c0d51cec5
commit 57c31ec435
4 changed files with 81 additions and 11 deletions

View File

@ -1,12 +1,27 @@
namespace CanonSharp.Common.LexicalAnalyzer; namespace CanonSharp.Common.LexicalAnalyzer;
public class DeterministicState(HashSet<NondeterministicState> closure) : IEquatable<DeterministicState> public class DeterministicState : IEquatable<DeterministicState>
{ {
public Guid Id { get; } = Guid.NewGuid(); public Guid Id { get; }
public Dictionary<char, DeterministicState> Transaction { get; } = []; public Dictionary<char, DeterministicState> Transaction { get; } = [];
public HashSet<NondeterministicState> Closure { get; } = closure; public HashSet<NondeterministicState> Closure { get; }
public DeterministicState(HashSet<NondeterministicState> closure)
{
Id = Guid.NewGuid();
Closure = closure;
}
private DeterministicState(DeterministicState state)
{
Id = state.Id;
Transaction = state.Transaction;
Closure = [];
}
public DeterministicState StripClosure() => new(this);
public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id); public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);

View File

@ -24,6 +24,30 @@ public class LexicalScannerBuilder
} }
} }
/// <summary>
/// 定义词法令牌
/// </summary>
/// <param name="expression">该令牌的正则表达式</param>
/// <param name="priority">识别该令牌的优先级</param>
/// <returns>定义好的词法令牌</returns>
public LexicalToken DefineToken(RegularExpression expression, int priority)
{
LexicalToken token = new(expression, priority);
DefineToken(token);
return token;
}
/// <summary>
/// 定义输出时需要跳过的词法令牌
/// </summary>
/// <param name="expression">该令牌的正则表达式</param>
/// <param name="priority">该令牌的优先级</param>
public void DefineSkippedToken(RegularExpression expression, int priority)
{
LexicalToken token = DefineToken(expression, priority);
AddSkippedToken(token);
}
public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token); public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
public LexicalScanner Build(ISourceReader reader) public LexicalScanner Build(ISourceReader reader)
@ -36,14 +60,41 @@ public class LexicalScannerBuilder
foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates) foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
{ {
finalTokenMap.Add(state, state.Closure finalTokenMap.Add(state.StripClosure(), state.Closure
.Where(s => _finalStateMap.ContainsKey(s)) .Where(s => _finalStateMap.ContainsKey(s))
.Select(s => _finalStateMap[s]) .Select(s => _finalStateMap[s])
.OrderByDescending(t => t.Priority) .OrderByDescending(t => t.Priority)
.First()); .First());
} }
return new LexicalScanner(deterministicFiniteAutomation.Start, finalTokenMap, _skippedTokens, reader); // 清除在分析中不需要的Closure引用
// 释放内存占用
Queue<DeterministicState> queue = [];
HashSet<DeterministicState> visited = [deterministicFiniteAutomation.Start];
DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure();
queue.Enqueue(strippedStartState);
while (queue.TryDequeue(out DeterministicState? state))
{
Dictionary<char, DeterministicState> transactions = [];
foreach (KeyValuePair<char,DeterministicState> pair in state.Transaction)
{
transactions.Add(pair.Key, pair.Value.StripClosure());
}
state.Transaction.Clear();
foreach (KeyValuePair<char,DeterministicState> pair in transactions)
{
state.Transaction.Add(pair.Key, pair.Value);
if (visited.Add(pair.Value))
{
queue.Enqueue(pair.Value);
}
}
}
return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader);
} }
private NondeterministicFiniteAutomation Combine() private NondeterministicFiniteAutomation Combine()

View File

@ -42,15 +42,13 @@ public class LexicalToken : IEquatable<LexicalToken>
/// 匹配所有的空白字符 /// 匹配所有的空白字符
/// </summary> /// </summary>
public static readonly LexicalToken WhiteSpace = new( public static readonly LexicalToken WhiteSpace = new(
RegularExpression.Alternate( RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator) |
RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator), RegularExpression.CharSetOf("\u0009\u000B\u000C"), int.MinValue);
RegularExpression.CharSetOf("\u0009\u000B\u000C")), int.MinValue);
/// <summary> /// <summary>
/// 匹配所有的换行符 /// 匹配所有的换行符
/// </summary> /// </summary>
public static readonly LexicalToken LineBreaker = new( public static readonly LexicalToken LineBreaker = new(
RegularExpression.Alternate( RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029") |
RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029"), RegularExpression.String("\r\n"), int.MinValue);
RegularExpression.String("\r\n")), int.MinValue);
} }

View File

@ -26,6 +26,9 @@ public abstract class RegularExpression
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) => public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
new AlternationExpression(left, right); new AlternationExpression(left, right);
public static RegularExpression operator |(RegularExpression left, RegularExpression right) =>
new AlternationExpression(left, right);
/// <summary> /// <summary>
/// left-right /// left-right
/// </summary> /// </summary>
@ -35,6 +38,9 @@ public abstract class RegularExpression
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) => public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
new ConcatenationExpression(first, second); new ConcatenationExpression(first, second);
public static RegularExpression operator +(RegularExpression left, RegularExpression right) =>
new ConcatenationExpression(left, right);
/// <summary> /// <summary>
/// inner* /// inner*
/// </summary> /// </summary>