fix: strip unused information and some operator override
This commit is contained in:
		@@ -1,12 +1,27 @@
 | 
			
		||||
namespace CanonSharp.Common.LexicalAnalyzer;
 | 
			
		||||
 | 
			
		||||
public class DeterministicState(HashSet<NondeterministicState> closure) : IEquatable<DeterministicState>
 | 
			
		||||
public class DeterministicState : IEquatable<DeterministicState>
 | 
			
		||||
{
 | 
			
		||||
    public Guid Id { get; } = Guid.NewGuid();
 | 
			
		||||
    public Guid Id { get; }
 | 
			
		||||
 | 
			
		||||
    public Dictionary<char, DeterministicState> Transaction { get; } = [];
 | 
			
		||||
 | 
			
		||||
    public HashSet<NondeterministicState> Closure { get; } = closure;
 | 
			
		||||
    public HashSet<NondeterministicState> Closure { get; }
 | 
			
		||||
 | 
			
		||||
    public DeterministicState(HashSet<NondeterministicState> closure)
 | 
			
		||||
    {
 | 
			
		||||
        Id = Guid.NewGuid();
 | 
			
		||||
        Closure = closure;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private DeterministicState(DeterministicState state)
 | 
			
		||||
    {
 | 
			
		||||
        Id = state.Id;
 | 
			
		||||
        Transaction = state.Transaction;
 | 
			
		||||
        Closure = [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public DeterministicState StripClosure() => new(this);
 | 
			
		||||
 | 
			
		||||
    public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -24,6 +24,30 @@ public class LexicalScannerBuilder
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// <summary>
 | 
			
		||||
    /// 定义词法令牌
 | 
			
		||||
    /// </summary>
 | 
			
		||||
    /// <param name="expression">该令牌的正则表达式</param>
 | 
			
		||||
    /// <param name="priority">识别该令牌的优先级</param>
 | 
			
		||||
    /// <returns>定义好的词法令牌</returns>
 | 
			
		||||
    public LexicalToken DefineToken(RegularExpression expression, int priority)
 | 
			
		||||
    {
 | 
			
		||||
        LexicalToken token = new(expression, priority);
 | 
			
		||||
        DefineToken(token);
 | 
			
		||||
        return token;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// <summary>
 | 
			
		||||
    /// 定义输出时需要跳过的词法令牌
 | 
			
		||||
    /// </summary>
 | 
			
		||||
    /// <param name="expression">该令牌的正则表达式</param>
 | 
			
		||||
    /// <param name="priority">该令牌的优先级</param>
 | 
			
		||||
    public void DefineSkippedToken(RegularExpression expression, int priority)
 | 
			
		||||
    {
 | 
			
		||||
        LexicalToken token = DefineToken(expression, priority);
 | 
			
		||||
        AddSkippedToken(token);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void AddSkippedToken(LexicalToken token) => _skippedTokens.Add(token);
 | 
			
		||||
 | 
			
		||||
    public LexicalScanner Build(ISourceReader reader)
 | 
			
		||||
@@ -36,14 +60,41 @@ public class LexicalScannerBuilder
 | 
			
		||||
 | 
			
		||||
        foreach (DeterministicState state in deterministicFiniteAutomation.FinalStates)
 | 
			
		||||
        {
 | 
			
		||||
            finalTokenMap.Add(state, state.Closure
 | 
			
		||||
            finalTokenMap.Add(state.StripClosure(), state.Closure
 | 
			
		||||
                .Where(s => _finalStateMap.ContainsKey(s))
 | 
			
		||||
                .Select(s => _finalStateMap[s])
 | 
			
		||||
                .OrderByDescending(t => t.Priority)
 | 
			
		||||
                .First());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return new LexicalScanner(deterministicFiniteAutomation.Start, finalTokenMap, _skippedTokens, reader);
 | 
			
		||||
        // 清除在分析中不需要的Closure引用
 | 
			
		||||
        // 释放内存占用
 | 
			
		||||
        Queue<DeterministicState> queue = [];
 | 
			
		||||
        HashSet<DeterministicState> visited = [deterministicFiniteAutomation.Start];
 | 
			
		||||
        DeterministicState strippedStartState = deterministicFiniteAutomation.Start.StripClosure();
 | 
			
		||||
        queue.Enqueue(strippedStartState);
 | 
			
		||||
 | 
			
		||||
        while (queue.TryDequeue(out DeterministicState? state))
 | 
			
		||||
        {
 | 
			
		||||
            Dictionary<char, DeterministicState> transactions = [];
 | 
			
		||||
 | 
			
		||||
            foreach (KeyValuePair<char,DeterministicState> pair in state.Transaction)
 | 
			
		||||
            {
 | 
			
		||||
                transactions.Add(pair.Key, pair.Value.StripClosure());
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            state.Transaction.Clear();
 | 
			
		||||
            foreach (KeyValuePair<char,DeterministicState> pair in transactions)
 | 
			
		||||
            {
 | 
			
		||||
                state.Transaction.Add(pair.Key, pair.Value);
 | 
			
		||||
                if (visited.Add(pair.Value))
 | 
			
		||||
                {
 | 
			
		||||
                    queue.Enqueue(pair.Value);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return new LexicalScanner(strippedStartState, finalTokenMap, _skippedTokens, reader);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private NondeterministicFiniteAutomation Combine()
 | 
			
		||||
 
 | 
			
		||||
@@ -42,15 +42,13 @@ public class LexicalToken : IEquatable<LexicalToken>
 | 
			
		||||
    /// 匹配所有的空白字符
 | 
			
		||||
    /// </summary>
 | 
			
		||||
    public static readonly LexicalToken WhiteSpace = new(
 | 
			
		||||
        RegularExpression.Alternate(
 | 
			
		||||
            RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator),
 | 
			
		||||
            RegularExpression.CharSetOf("\u0009\u000B\u000C")), int.MinValue);
 | 
			
		||||
        RegularExpression.CharSetOf(c => char.GetUnicodeCategory(c) == UnicodeCategory.SpaceSeparator) |
 | 
			
		||||
        RegularExpression.CharSetOf("\u0009\u000B\u000C"), int.MinValue);
 | 
			
		||||
 | 
			
		||||
    /// <summary>
 | 
			
		||||
    /// 匹配所有的换行符
 | 
			
		||||
    /// </summary>
 | 
			
		||||
    public static readonly LexicalToken LineBreaker = new(
 | 
			
		||||
        RegularExpression.Alternate(
 | 
			
		||||
            RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029"),
 | 
			
		||||
            RegularExpression.String("\r\n")), int.MinValue);
 | 
			
		||||
            RegularExpression.CharSetOf("\u000D\u000A\u0085\u2028\u2029") |
 | 
			
		||||
            RegularExpression.String("\r\n"), int.MinValue);
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -26,6 +26,9 @@ public abstract class RegularExpression
 | 
			
		||||
    public static RegularExpression Alternate(RegularExpression left, RegularExpression right) =>
 | 
			
		||||
        new AlternationExpression(left, right);
 | 
			
		||||
 | 
			
		||||
    public static RegularExpression operator |(RegularExpression left, RegularExpression right) =>
 | 
			
		||||
        new AlternationExpression(left, right);
 | 
			
		||||
 | 
			
		||||
    /// <summary>
 | 
			
		||||
    /// left-right
 | 
			
		||||
    /// </summary>
 | 
			
		||||
@@ -35,6 +38,9 @@ public abstract class RegularExpression
 | 
			
		||||
    public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) =>
 | 
			
		||||
        new ConcatenationExpression(first, second);
 | 
			
		||||
 | 
			
		||||
    public static RegularExpression operator +(RegularExpression left, RegularExpression right) =>
 | 
			
		||||
        new ConcatenationExpression(left, right);
 | 
			
		||||
 | 
			
		||||
    /// <summary>
 | 
			
		||||
    /// inner*
 | 
			
		||||
    /// </summary>
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user