fix: strip unused information and some operator override
This commit is contained in:
parent
3c0d51cec5
commit
57c31ec435
|
@ -1,12 +1,27 @@
|
||||||
namespace CanonSharp.Common.LexicalAnalyzer;
|
namespace CanonSharp.Common.LexicalAnalyzer;
|
||||||
|
|
||||||
public class DeterministicState(HashSet<NondeterministicState> closure) : IEquatable<DeterministicState>
|
public class DeterministicState : IEquatable<DeterministicState>
|
||||||
{
|
{
|
||||||
public Guid Id { get; } = Guid.NewGuid();
|
public Guid Id { get; }
|
||||||
|
|
||||||
public Dictionary<char, DeterministicState> Transaction { get; } = [];
|
public Dictionary<char, DeterministicState> Transaction { get; } = [];
|
||||||
|
|
||||||
public HashSet<NondeterministicState> Closure { get; } = closure;
|
public HashSet<NondeterministicState> Closure { get; }
|
||||||
|
|
||||||
|
public DeterministicState(HashSet<NondeterministicState> closure)
|
||||||
|
{
|
||||||
|
Id = Guid.NewGuid();
|
||||||
|
Closure = closure;
|
||||||
|
}
|
||||||
|
|
||||||
|
private DeterministicState(DeterministicState state)
|
||||||
|
{
|
||||||
|
Id = state.Id;
|
||||||
|
Transaction = state.Transaction;
|
||||||
|
Closure = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
public DeterministicState StripClosure() => new(this);
|
||||||
|
|
||||||
public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);
|
public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,30 @@ public class LexicalScannerBuilder
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 定义词法令牌
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="expression">该令牌的正则表达式</param>
|
||||||
|
/// <param name="priority">识别该令牌的优先级</param>
|
||||||
|
/// <returns>定义好的词法令牌</returns>
|
||||||
|
public LexicalToken DefineToken(RegularExpression expression, int priority)
|
||||||
|
{
|
||||||
|
LexicalToken token = new(expression, priority);
|
||||||
|
DefineToken(token);
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// 定义输出时需要跳过的词法令牌
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="expression">该令牌的正则表达式</param>
|
||||||
|
/// <param name="priority">该令牌的优先级</param>
|
||||||
|
public void DefineSkippedToken(RegularExpression expression, int priority)
|
||||||
|
{
|
||||||
|
LexicalToken token = DefineToken(expression, priority);
|
||||||
|
AddSkippedToken(token);
|
||||||
|
}
|
||||||
|
|
||||||
public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
|
public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
|
||||||
|
|
||||||
public LexicalScanner Build(ISourceReader reader)
|
public LexicalScanner Build(ISourceReader reader)
|
||||||
|
@ -36,14 +60,41 @@ public class LexicalScannerBuilder
|
||||||
|
|
||||||
foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
|
foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
|
||||||
{
|
{
|
||||||
finalTokenMap.Add(state, state.Closure
|
finalTokenMap.Add(state.StripClosure(), state.Closure
|
||||||
.Where(s => _finalStateMap.ContainsKey(s))
|
.Where(s => _finalStateMap.ContainsKey(s))
|
||||||
.Select(s => _finalStateMap[s])
|
.Select(s => _finalStateMap[s])
|
||||||
.OrderByDescending(t => t.Priority)
|
.OrderByDescending(t => t.Priority)
|
||||||
.First());
|
.First());
|
||||||
}
|
}
|
||||||
|
|
||||||
return new LexicalScanner(deterministicFiniteAutomation.Start, finalTokenMap, _skippedTokens, reader);
|
// 清除在分析中不需要的Closure引用
|
||||||
|
// 释放内存占用
|
||||||
|
Queue<DeterministicState> queue = [];
|
||||||
|
HashSet<DeterministicState> visited = [deterministicFiniteAutomation.Start];
|
||||||
|
DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure();
|
||||||
|
queue.Enqueue(strippedStartState);
|
||||||
|
|
||||||
|
while (queue.TryDequeue(out DeterministicState? state))
|
||||||
|
{
|
||||||
|
Dictionary<char, DeterministicState> transactions = [];
|
||||||
|
|
||||||
|
foreach (KeyValuePair<char,DeterministicState> pair in state.Transaction)
|
||||||
|
{
|
||||||
|
transactions.Add(pair.Key, pair.Value.StripClosure());
|
||||||
|
}
|
||||||
|
|
||||||
|
state.Transaction.Clear();
|
||||||
|
foreach (KeyValuePair<char,DeterministicState> pair in transactions)
|
||||||
|
{
|
||||||
|
state.Transaction.Add(pair.Key, pair.Value);
|
||||||
|
if (visited.Add(pair.Value))
|
||||||
|
{
|
||||||
|
queue.Enqueue(pair.Value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
private NondeterministicFiniteAutomation Combine()
|
private NondeterministicFiniteAutomation Combine()
|
||||||
|
|
|
@ -42,15 +42,13 @@ public class LexicalToken : IEquatable<LexicalToken>
|
||||||
/// 匹配所有的空白字符
|
/// 匹配所有的空白字符
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static readonly LexicalToken WhiteSpace = new(
|
public static readonly LexicalToken WhiteSpace = new(
|
||||||
RegularExpression.Alternate(
|
RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator) |
|
||||||
RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator),
|
RegularExpression.CharSetOf("\u0009\u000B\u000C"), int.MinValue);
|
||||||
RegularExpression.CharSetOf("\u0009\u000B\u000C")), int.MinValue);
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 匹配所有的换行符
|
/// 匹配所有的换行符
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static readonly LexicalToken LineBreaker = new(
|
public static readonly LexicalToken LineBreaker = new(
|
||||||
RegularExpression.Alternate(
|
RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029") |
|
||||||
RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029"),
|
RegularExpression.String("\r\n"), int.MinValue);
|
||||||
RegularExpression.String("\r\n")), int.MinValue);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,9 @@ public abstract class RegularExpression
|
||||||
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
|
public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
|
||||||
new AlternationExpression(left, right);
|
new AlternationExpression(left, right);
|
||||||
|
|
||||||
|
public static RegularExpression operator |(RegularExpression left, RegularExpression right) =>
|
||||||
|
new AlternationExpression(left, right);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// left-right
|
/// left-right
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -35,6 +38,9 @@ public abstract class RegularExpression
|
||||||
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
|
public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
|
||||||
new ConcatenationExpression(first, second);
|
new ConcatenationExpression(first, second);
|
||||||
|
|
||||||
|
public static RegularExpression operator +(RegularExpression left, RegularExpression right) =>
|
||||||
|
new ConcatenationExpression(left, right);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// inner*
|
/// inner*
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user