From 02c5690d9753fed74e97b8565746af3fca51ddcb Mon Sep 17 00:00:00 2001 From: jackfiled Date: Sat, 27 Jul 2024 14:56:28 +0800 Subject: [PATCH] =?UTF-8?q?add:=20nfa2dfa=E7=AE=97=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CanonSharp.Common/CanonSharp.Common.csproj | 1 - .../DeterministicFiniteAutomation.cs | 91 +++++++++++++++ .../LexicalAnalyzer/EmptyChar.cs | 49 ++++++++ .../NondeterministicFiniteAutomation.cs | 76 ++++++++++++ .../LexicalAnalyzer/RegularExpression.cs | 109 ++++++++++++++++++ CanonSharp.Parser/CanonSharp.Parser.fsproj | 5 - .../NondeterministicFiniteAutomation.fs | 38 ------ .../LexicalAnalyzer/RegularExpression.fs | 12 -- .../RegularExpressionTests.cs | 77 +++++++++++++ 9 files changed, 402 insertions(+), 56 deletions(-) create mode 100644 CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs create mode 100644 CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs create mode 100644 CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs create mode 100644 CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs delete mode 100644 CanonSharp.Parser/LexicalAnalyzer/NondeterministicFiniteAutomation.fs delete mode 100644 CanonSharp.Parser/LexicalAnalyzer/RegularExpression.fs create mode 100644 CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs diff --git a/CanonSharp.Common/CanonSharp.Common.csproj b/CanonSharp.Common/CanonSharp.Common.csproj index 94a06d0..5771fa3 100644 --- a/CanonSharp.Common/CanonSharp.Common.csproj +++ b/CanonSharp.Common/CanonSharp.Common.csproj @@ -7,7 +7,6 @@ - diff --git a/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs b/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs new file mode 100644 index 0000000..6d52008 --- /dev/null +++ b/CanonSharp.Common/LexicalAnalyzer/DeterministicFiniteAutomation.cs @@ -0,0 +1,91 @@ +namespace CanonSharp.Common.LexicalAnalyzer; + +public class DeterministicState : IEquatable +{ + public Guid Id { get; } = Guid.NewGuid(); + + public Dictionary Transaction { get; } = []; + + public bool Equals(DeterministicState? other) => other is not null && Id.Equals(other.Id); + + public override bool Equals(object? obj) => obj is DeterministicState other && Equals(other); + + public override int GetHashCode() => Id.GetHashCode(); +} + +public class DeterministicFiniteAutomation +{ + public DeterministicState Start { get; } + + public HashSet FinalStates { get; } + + private DeterministicFiniteAutomation(DeterministicState start, HashSet finalStates) + { + Start = start; + FinalStates = finalStates; + } + + private record Pair(HashSet States, DeterministicState State); + + public static DeterministicFiniteAutomation Create(NondeterministicFiniteAutomation nfa) + { + Dictionary map = []; + HashSet visited = []; + Queue queue = []; + HashSet finalStates = []; + + HashSet startClosure = nfa.Start.CalculateEmptyClosure(); + DeterministicState start = new(); + map.Add(new NondeterministicStateSet(startClosure), start); + queue.Enqueue(new Pair(startClosure, start)); + + while (queue.TryDequeue(out Pair? pair)) + { + if (pair.States.Any(s => nfa.FinalStates.Contains(s))) + { + finalStates.Add(pair.State); + } + + Dictionary> next = []; + + foreach (NondeterministicState state in pair.States) + { + foreach (KeyValuePair> transaction in + state.Transactions.Where(p => !p.Key.IsEmpty)) + { + HashSet closure = []; + + foreach (NondeterministicState s in transaction.Value) + { + closure.UnionWith(s.CalculateEmptyClosure()); + } + + if (next.TryGetValue(transaction.Key.Char, out HashSet? n)) + { + n.UnionWith(closure); + } + next.Add(transaction.Key.Char, closure); + } + } + + foreach (KeyValuePair> transaction in next) + { + NondeterministicStateSet set = new(transaction.Value); + if (!map.TryGetValue(set, out DeterministicState? nextState)) + { + nextState = new DeterministicState(); + map.Add(set, nextState); + } + + pair.State.Transaction.Add(transaction.Key, nextState); + + if (visited.Add(nextState)) + { + queue.Enqueue(new Pair(transaction.Value, nextState)); + } + } + } + + return new DeterministicFiniteAutomation(start, finalStates); + } +} diff --git a/CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs b/CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs new file mode 100644 index 0000000..e31aa89 --- /dev/null +++ b/CanonSharp.Common/LexicalAnalyzer/EmptyChar.cs @@ -0,0 +1,49 @@ +namespace CanonSharp.Common.LexicalAnalyzer; + +public class EmptyChar : IEquatable +{ + public bool IsEmpty { get; } + + public char Char { get; } + + public static EmptyChar Empty => new(); + + private EmptyChar() + { + IsEmpty = true; + Char = char.MaxValue; + } + + public EmptyChar(char c) + { + IsEmpty = false; + Char = c; + } + + public bool Equals(EmptyChar? other) + { + if (other is null) + { + return false; + } + + if (IsEmpty) + { + return other.IsEmpty; + } + + return Char == other.Char; + } + + public override bool Equals(object? obj) => obj is EmptyChar other && Equals(other); + + public override int GetHashCode() + { + return IsEmpty.GetHashCode() ^ Char.GetHashCode(); + } + + public override string ToString() + { + return IsEmpty ? "ε" : Char.ToString(); + } +} diff --git a/CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs b/CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs new file mode 100644 index 0000000..330d579 --- /dev/null +++ b/CanonSharp.Common/LexicalAnalyzer/NondeterministicFiniteAutomation.cs @@ -0,0 +1,76 @@ +namespace CanonSharp.Common.LexicalAnalyzer; + +public class NondeterministicState : IEquatable +{ + public Guid Id { get; } = Guid.NewGuid(); + + public Dictionary> Transactions { get; } = []; + + public bool Equals(NondeterministicState? other) => other is not null && Id.Equals(other.Id); + + public void AddTransaction(EmptyChar c, NondeterministicState state) + { + if (Transactions.TryGetValue(c, out HashSet? states)) + { + states.Add(state); + } + else + { + Transactions.Add(c, [state]); + } + } + + public override bool Equals(object? obj) => obj is NondeterministicState other && Equals(other); + + public override int GetHashCode() => Id.GetHashCode(); + + public HashSet CalculateEmptyClosure() + { + HashSet result = []; + Queue queue = []; + queue.Enqueue(this); + + while (queue.TryDequeue(out NondeterministicState? state)) + { + result.Add(state); + + if (!state.Transactions.TryGetValue(EmptyChar.Empty, out HashSet? next)) + { + continue; + } + + foreach (NondeterministicState s in next.Where(s => !result.Contains(s))) + { + queue.Enqueue(s); + } + } + + return result; + } +} + +public class NondeterministicStateSet(HashSet states) : IEquatable +{ + private readonly HashSet _states = states; + + public bool Equals(NondeterministicStateSet? other) + { + if (other is null) + { + return false; + } + + return _states.Count == other._states.Count && _states.All(s => other._states.Contains(s)); + } + + public override bool Equals(object? obj) => obj is NondeterministicStateSet other && Equals(other); + + public override int GetHashCode() => _states.Aggregate(0, (current, state) => current ^ state.GetHashCode()); +} + +public class NondeterministicFiniteAutomation(NondeterministicState start, HashSet finalStates) +{ + public NondeterministicState Start { get; } = start; + + public HashSet FinalStates { get; } = finalStates; +} diff --git a/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs b/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs new file mode 100644 index 0000000..e051535 --- /dev/null +++ b/CanonSharp.Common/LexicalAnalyzer/RegularExpression.cs @@ -0,0 +1,109 @@ +namespace CanonSharp.Common.LexicalAnalyzer; + +public abstract class RegularExpression +{ + public abstract NondeterministicFiniteAutomation Convert2Nfa(); + + public static RegularExpression Empty => new EmptyExpression(); + + public static RegularExpression Single(char c) => new SymbolExpression(c); + + public static RegularExpression Alternate(RegularExpression left, RegularExpression right) => + new AlternationExpression(left, right); + + public static RegularExpression Concatenate(RegularExpression first, RegularExpression second) => + new ConcatenationExpression(first, second); + + public static RegularExpression Kleene(RegularExpression inner) => new KleeneExpression(inner); +} + +public class EmptyExpression : RegularExpression +{ + public override NondeterministicFiniteAutomation Convert2Nfa() + { + NondeterministicState final = new(); + NondeterministicState start = new(); + start.AddTransaction(EmptyChar.Empty, final); + + return new NondeterministicFiniteAutomation(start, [final]); + } +} + +public class SymbolExpression(char symbol) : RegularExpression +{ + public char Symbol { get; } = symbol; + + public override NondeterministicFiniteAutomation Convert2Nfa() + { + NondeterministicState final = new(); + NondeterministicState start = new(); + start.AddTransaction(new EmptyChar(Symbol), final); + + return new NondeterministicFiniteAutomation(start, [final]); + } +} + +public class AlternationExpression(RegularExpression left, RegularExpression right) : RegularExpression +{ + public RegularExpression Left { get; } = left; + + public RegularExpression Right { get; } = right; + + public override NondeterministicFiniteAutomation Convert2Nfa() + { + NondeterministicFiniteAutomation left = Left.Convert2Nfa(); + NondeterministicFiniteAutomation right = Right.Convert2Nfa(); + + NondeterministicState final = new(); + foreach (NondeterministicState state in left.FinalStates.Concat(right.FinalStates)) + { + state.AddTransaction(EmptyChar.Empty, final); + } + + NondeterministicState start = new(); + start.AddTransaction(EmptyChar.Empty, left.Start); + start.AddTransaction(EmptyChar.Empty, right.Start); + + return new NondeterministicFiniteAutomation(start, [final]); + } +} + +public class ConcatenationExpression(RegularExpression first, RegularExpression second) : RegularExpression +{ + public RegularExpression First { get; } = first; + + public RegularExpression Second { get; } = second; + + public override NondeterministicFiniteAutomation Convert2Nfa() + { + NondeterministicFiniteAutomation first = First.Convert2Nfa(); + NondeterministicFiniteAutomation second = Second.Convert2Nfa(); + + foreach (NondeterministicState state in first.FinalStates) + { + state.AddTransaction(EmptyChar.Empty, second.Start); + } + + return new NondeterministicFiniteAutomation(first.Start, second.FinalStates); + } +} + +public class KleeneExpression(RegularExpression inner) : RegularExpression +{ + public RegularExpression Inner { get; } = inner; + + public override NondeterministicFiniteAutomation Convert2Nfa() + { + NondeterministicFiniteAutomation inner = Inner.Convert2Nfa(); + + NondeterministicState final = new(); + final.AddTransaction(EmptyChar.Empty, inner.Start); + + foreach (NondeterministicState state in inner.FinalStates) + { + state.AddTransaction(EmptyChar.Empty, final); + } + + return new NondeterministicFiniteAutomation(final, [final]); + } +} diff --git a/CanonSharp.Parser/CanonSharp.Parser.fsproj b/CanonSharp.Parser/CanonSharp.Parser.fsproj index defded5..b53f1b2 100644 --- a/CanonSharp.Parser/CanonSharp.Parser.fsproj +++ b/CanonSharp.Parser/CanonSharp.Parser.fsproj @@ -5,9 +5,4 @@ true - - - - - diff --git a/CanonSharp.Parser/LexicalAnalyzer/NondeterministicFiniteAutomation.fs b/CanonSharp.Parser/LexicalAnalyzer/NondeterministicFiniteAutomation.fs deleted file mode 100644 index 4e8150f..0000000 --- a/CanonSharp.Parser/LexicalAnalyzer/NondeterministicFiniteAutomation.fs +++ /dev/null @@ -1,38 +0,0 @@ -module CanonSharp.Parser.LexicalAnalyzer.NondeterministicFiniteAutomation - -open System -open System.Collections.Generic -open CanonSharp.Parser.LexicalAnalyzer.RegularExpression - -type NondeterministicState(id: Guid, transaction: Option -> list) = - member val id = id - member val transaction = transaction - - override this.GetHashCode() = this.id.GetHashCode() - - new() = NondeterministicState(Guid.NewGuid(), fun a -> list.Empty) - - -type NondeterministicFiniteAutomation(states: HashSet, entryTransaction: Option -> list) = - member val states = states - member val entryTransaction = entryTransaction - -let convertEmptyToNonDeterministicFiniteAutomation (expression: EmptyExpression) = - let final = NondeterministicState() - let transaction (a: Option) = - match a with - | Some _ -> list.Empty - | None -> [final] - - let states = HashSet() - let _ = states.Add(final) - - NondeterministicFiniteAutomation(states, transaction) - - - - - -let convertToNondeterministicFiniteAutomation expression: RegularExpression = - match expression with - | EmptyExpression -> diff --git a/CanonSharp.Parser/LexicalAnalyzer/RegularExpression.fs b/CanonSharp.Parser/LexicalAnalyzer/RegularExpression.fs deleted file mode 100644 index 2bc366f..0000000 --- a/CanonSharp.Parser/LexicalAnalyzer/RegularExpression.fs +++ /dev/null @@ -1,12 +0,0 @@ -module CanonSharp.Parser.LexicalAnalyzer.RegularExpression - -type RegularExpression = - | EmptyExpression - | SymbolExpression of symbol: char - | AlternationExpression of left: RegularExpression * right : RegularExpression - | ConcatenationExpression of first: RegularExpression * second : RegularExpression - | KleeneExpression of expression: RegularExpression - -let convertToNondeterministicFiniteAutomation expression = - match expression with - | EmptyExpression emptyExpression -> diff --git a/CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs b/CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs new file mode 100644 index 0000000..8fb8820 --- /dev/null +++ b/CanonSharp.Tests/LexicalAnalyzerTests/RegularExpressionTests.cs @@ -0,0 +1,77 @@ +using CanonSharp.Common.LexicalAnalyzer; + +namespace CanonSharp.Tests.LexicalAnalyzerTests; + +public class RegularExpressionTests +{ + [Fact] + public void KleeneTest() + { + RegularExpression expression = RegularExpression.Concatenate( + RegularExpression.Kleene(RegularExpression.Single('a')), + RegularExpression.Single('b')); + + NondeterministicFiniteAutomation automation = expression.Convert2Nfa(); + + automation.Start.Transactions.TryGetValue(EmptyChar.Empty, out HashSet? next); + Assert.NotNull(next); + + Assert.Contains(next, s => s.Transactions.ContainsKey(new EmptyChar('a'))); + Assert.Contains(next, s => s.Transactions.ContainsKey(new EmptyChar('b'))); + } + + [Fact] + public void AlternateTest() + { + RegularExpression expression = RegularExpression.Alternate( + RegularExpression.Kleene(RegularExpression.Single('a')), + RegularExpression.Single('b')); + + NondeterministicFiniteAutomation automation = expression.Convert2Nfa(); + + automation.Start.Transactions.TryGetValue(EmptyChar.Empty, out HashSet? next); + Assert.NotNull(next); + + Assert.Contains(next, s => s.Transactions.ContainsKey(new EmptyChar('b'))); + + NondeterministicState? state = (from item in next + where item.Transactions[EmptyChar.Empty].Count == 2 + select item).FirstOrDefault(); + Assert.NotNull(state); + + Assert.Contains(state.Transactions[EmptyChar.Empty], + s => s.Transactions.ContainsKey(new EmptyChar('a'))); + } + + [Fact] + public void ConvertTest() + { + RegularExpression expression = RegularExpression.Alternate( + RegularExpression.Kleene(RegularExpression.Single('a')), + RegularExpression.Single('b')); + + NondeterministicFiniteAutomation automation = expression.Convert2Nfa(); + DeterministicFiniteAutomation dfa = DeterministicFiniteAutomation.Create(automation); + + DeterministicState state2 = dfa.Start.Transaction['a']; + Assert.Equal(state2, state2.Transaction['a']); + + DeterministicState state3 = dfa.Start.Transaction['b']; + Assert.Empty(state3.Transaction); + + Assert.Equal(3, dfa.FinalStates.Count); + } + + [Fact] + public void NondeterministicStateSetTest() + { + Dictionary map = []; + + NondeterministicState key1 = new(); + NondeterministicState key2 = new(); + + map.Add(new NondeterministicStateSet([key1, key2]), 'a'); + + Assert.Equal('a', map[new NondeterministicStateSet([key2, key1])]); + } +}