Mercurial > pub > ImplabNet
changeset 169:54270c2f29f2 ref20160224
DFA refactoring
author | cin |
---|---|
date | Thu, 03 Mar 2016 08:41:02 +0300 (2016-03-03) |
parents | 8fb9c9507a26 |
children | 181119ef3b39 |
files | Implab/Automaton/DFAStateDescriptor.cs Implab/Automaton/DFATable.cs Implab/Automaton/DFATransitionTable.cs Implab/Automaton/RegularExpressions/IDFATable2.cs Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Implab/Implab.csproj |
diffstat | 6 files changed, 304 insertions(+), 296 deletions(-) [+] |
line wrap: on
line diff
--- a/Implab/Automaton/DFAStateDescriptor.cs Wed Mar 02 19:59:16 2016 +0300 +++ b/Implab/Automaton/DFAStateDescriptor.cs Thu Mar 03 08:41:02 2016 +0300 @@ -11,5 +11,16 @@ public DFAStateDescriptior(int[] transitions) : this(transitions, false) { } + + public DFAStateDescriptior(int size, bool final) { + Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); + + this.final = final; + + transitions = new int[size]; + + for (int i = 0; i < size; i++) + transitions[i] = DFAConst.UNREACHABLE_STATE; + } } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/DFATable.cs Thu Mar 03 08:41:02 2016 +0300 @@ -0,0 +1,280 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class DFATable : IDFATableBuilder { + DFAStateDescriptior[] m_dfaTable; + + int m_stateCount; + int m_symbolCount; + int m_initialState; + + readonly HashSet<int> m_finalStates = new HashSet<int>(); + readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); + + void AssertNotReadOnly() { + if (m_dfaTable != null) + throw new InvalidOperationException("The object is readonly"); + } + + + #region IDFADefinition implementation + + public DFAStateDescriptior[] GetTransitionTable() { + if (m_dfaTable == null) { + if (m_stateCount <= 0) + throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); + if (m_symbolCount <= 0) + throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount); + + m_dfaTable = ConstructTransitionTable(); + } + return m_dfaTable; + } + + public bool IsFinalState(int s) { + Safe.ArgumentInRange(s, 0, m_stateCount, "s"); + + return m_dfaTable != null ? m_dfaTable[s].final : m_finalStates.Contains(s); + } + + public IEnumerable<int> FinalStates { + get { + return m_finalStates; + } + } + + public int StateCount { + get { return m_stateCount; } + } + + public int AlphabetSize { + get { return m_symbolCount; } + } + + public int InitialState { + get { return m_initialState; } + } + + #endregion + + protected virtual DFAStateDescriptior[] ConstructTransitionTable() { + var dfaTable = new DFAStateDescriptior[m_stateCount]; + + + foreach (var t in m_transitions) { + if (dfaTable[t.s1].transitions == null) + dfaTable[t.s1] = new DFAStateDescriptior(m_symbolCount, m_finalStates.Contains(t.s1)); + + dfaTable[t.s1].transitions[t.edge] = t.s2; + } + + foreach (var s in m_finalStates) + if (!dfaTable[s].final) + m_dfaTable[s] = new DFAStateDescriptior(m_symbolCount, true); + + } + + public void SetInitialState(int s) { + Safe.ArgumentAssert(s >= 0, "s"); + m_initialState = s; + } + + public void MarkFinalState(int state) { + AssertNotReadOnly(); + m_finalStates.Add(state); + } + + public void Add(AutomatonTransition item) { + AssertNotReadOnly(); + Safe.ArgumentAssert(item.s1 >= 0, "item"); + Safe.ArgumentAssert(item.s2 >= 0, "item"); + Safe.ArgumentAssert(item.edge >= 0, "item"); + + m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1); + m_symbolCount = Math.Max(m_symbolCount, item.edge); + + m_transitions.Add(item); + } + + public void Clear() { + AssertNotReadOnly(); + + m_stateCount = 0; + m_symbolCount = 0; + m_finalStates.Clear(); + m_transitions.Clear(); + } + + public bool Contains(AutomatonTransition item) { + return m_transitions.Contains(item); + } + + public void CopyTo(AutomatonTransition[] array, int arrayIndex) { + m_transitions.CopyTo(array, arrayIndex); + } + + public bool Remove(AutomatonTransition item) { + AssertNotReadOnly(); + m_transitions.Remove(item); + } + + public int Count { + get { + return m_transitions.Count; + } + } + + public bool IsReadOnly { + get { + return m_dfaTable != null; + } + } + + public IEnumerator<AutomatonTransition> GetEnumerator() { + return m_transitions.GetEnumerator(); + } + + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { + return GetEnumerator(); + } + + /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> + /// <remarks> + /// В процессе построения минимального автомата требуется разделить множество состояний, + /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества + /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний, + /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний. + /// </remarks> + /// <returns>The final states.</returns> + protected virtual IEnumerable<HashSet<int>> GroupFinalStates() { + return new HashSet<int>[] { m_finalStates }; + } + + protected void Optimize<TInput, TState>( + IDFATableBuilder optimalDFA, + IAlphabet<TInput> inputAlphabet, + IAlphabetBuilder<TInput> optimalInputAlphabet, + IAlphabet<TState> stateAlphabet, + IAlphabetBuilder<TState> optimalStateAlphabet + ) { + Safe.ArgumentNotNull(optimalDFA, "dfa"); + Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); + Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); + Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); + Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); + + if (inputAlphabet.Count != m_symbolCount) + throw new InvalidOperationException("The input symbols aphabet mismatch"); + if (stateAlphabet.Count != m_stateCount) + throw new InvalidOperationException("The states alphabet mismatch"); + + var setComparer = new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + s => s.Sum(x => x.GetHashCode()) + ); + + var optimalStates = new HashSet<HashSet<int>>(setComparer); + var queue = new HashSet<HashSet<int>>(setComparer); + + // получаем конечные состояния, сгруппированные по маркерам + optimalStates.UnionWith( + GroupFinalStates() + ); + + var state = new HashSet<int>( + Enumerable + .Range(0, m_stateCount - 1) + .Where(i => !m_finalStates.Contains(i)) + ); + + optimalStates.Add(state); + queue.Add(state); + + var rmap = m_transitions + .GroupBy(t => t.s2) + .ToLookup( + g => g.Key, // s2 + g => g.ToLookup(t => t.edge, t => t.s1) + ); + + while (queue.Count > 0) { + var stateA = queue.First(); + queue.Remove(stateA); + + for (int c = 0; c < m_symbolCount; c++) { + var stateX = new HashSet<int>(); + foreach(var a in stateA) + stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' + + foreach (var stateY in optimalStates.ToArray()) { + if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { + var stateR1 = new HashSet<int>(stateY); + var stateR2 = new HashSet<int>(stateY); + + stateR1.IntersectWith(stateX); + stateR2.ExceptWith(stateX); + + optimalStates.Remove(stateY); + optimalStates.Add(stateR1); + optimalStates.Add(stateR2); + + if (queue.Contains(stateY)) { + queue.Remove(stateY); + queue.Add(stateR1); + queue.Add(stateR2); + } else { + queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); + } + } + } + } + } + + // карта получения оптимального состояния по соотвествующему ему простому состоянию + var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); + + // получаем минимальный алфавит + // входные символы не различимы, если Move(s,a1) == Move(s,a2) + var optimalAlphabet = m_transitions + .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); + + var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); + + // построение автомата + optimalDFA.SetInitialState(statesMap[m_initialState]); + + foreach (var sf in m_finalStates.GroupBy(s => statesMap[s])) + optimalDFA.MarkFinalState(sf.Key); + + foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) + optimalDFA.Add(t); + + } + + protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { + Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); + Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); + + var inputMap = inputAlphabet.CreateReverseMap(); + var stateMap = stateAlphabet.CreateReverseMap(); + + for (int i = 0; i < inputMap.Length; i++) + Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); + + + foreach(var t in m_transitions) + Console.WriteLine( + "[{0}] -{{{1}}}-> [{2}]{3}", + stateMap[t.s1], + String.Join(",", inputMap[t.edge]), + stateMap[t.s2], + m_finalStates.Contains(t.s2) ? "$" : "" + ); + + } + + } +}
--- a/Implab/Automaton/DFATransitionTable.cs Wed Mar 02 19:59:16 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,293 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton { - public class DFATransitionTable : IDFATableBuilder { - DFAStateDescriptior[] m_dfaTable; - - int m_stateCount; - int m_symbolCount; - int m_initialState; - - readonly HashSet<int> m_finalStates = new HashSet<int>(); - readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); - - - #region IDFADefinition implementation - - public DFAStateDescriptior[] GetTransitionTable() { - if (m_dfaTable == null) { - if (m_stateCount <= 0) - throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); - if (m_symbolCount <= 0) - throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount); - - m_dfaTable = ConstructTransitionTable(); - } - return m_dfaTable; - } - - public bool IsFinalState(int s) { - Safe.ArgumentInRange(s, 0, m_stateCount, "s"); - - return m_dfaTable != null ? m_dfaTable[s].final : m_finalStates.Contains(s); - } - - public IEnumerable<int> FinalStates { - get { - return m_finalStates; - } - } - - public int StateCount { - get { return m_stateCount; } - } - - public int AlphabetSize { - get { return m_symbolCount; } - } - - public int InitialState { - get { return m_initialState; } - } - - int[] NewTransitionArray() { - var t = new int[m_symbolCount]; - - for (var i = 0; i < m_symbolCount; i++) - t[i] = DFAConst.UNREACHABLE_STATE; - return t; - } - - #endregion - - protected virtual DFAStateDescriptior[] ConstructTransitionTable() { - var dfaTable = new DFAStateDescriptior[m_stateCount]; - - - foreach (var t in m_transitions) { - if (dfaTable[t.s1].transitions == null) - dfaTable[t.s1] = new DFAStateDescriptior(NewTransitionArray(), m_finalStates.Contains(t.s1)); - - dfaTable[t.s1].transitions[t.edge] = t.s2; - } - - foreach (var s in m_finalStates) - if (!dfaTable[s].final) - m_dfaTable[s] = new DFAStateDescriptior(NewTransitionArray, true); - - } - - #region IDFADefinitionBuilder - - public void DefineTransition(int s1, int s2, int symbol) { - if (m_dfaTable != null) - throw new InvalidOperationException("The transition table is already built"); - - Safe.ArgumentAssert(s1 > 0, "s1"); - Safe.ArgumentAssert(s2 > 0, "s2"); - Safe.ArgumentAssert(symbol >= 0, "symbol"); - - m_stateCount = Math.Max(Math.Max(m_stateCount, s1 + 1), s2 + 1); - m_symbolCount = Math.Max(m_symbolCount, symbol + 1); - - m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); - } - - public void MarkFinalState(int state, params TTag[] tags) { - if (m_dfaTable != null) - throw new InvalidOperationException("The transition table is already built"); - - m_finalStates[state] = tags; - } - - public void SetInitialState(int s) { - Safe.ArgumentAssert(s >= 0, "s"); - m_initialState = s; - } - - - #endregion - - protected virtual IEnumerable<HashSet<int>> GroupFinalStates() { - return new HashSet<int>[] { m_finalStates }; - } - - protected void Optimize<TInput, TState>( - IDFATableBuilder optimalDFA, - IAlphabet<TInput> inputAlphabet, - IAlphabetBuilder<TInput> optimalInputAlphabet, - IAlphabet<TState> stateAlphabet, - IAlphabetBuilder<TState> optimalStateAlphabet - ) { - Safe.ArgumentNotNull(optimalDFA, "dfa"); - Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); - Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); - Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); - Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); - - if (inputAlphabet.Count != m_symbolCount) - throw new InvalidOperationException("The input symbols aphabet mismatch"); - if (stateAlphabet.Count != m_stateCount) - throw new InvalidOperationException("The states alphabet mismatch"); - - var setComparer = new CustomEqualityComparer<HashSet<int>>( - (x, y) => x.SetEquals(y), - s => s.Sum(x => x.GetHashCode()) - ); - - var optimalStates = new HashSet<HashSet<int>>(setComparer); - var queue = new HashSet<HashSet<int>>(setComparer); - - // получаем конечные состояния, сгруппированные по маркерам - optimalStates.UnionWith( - GroupFinalStates() - ); - - var state = new HashSet<int>( - Enumerable - .Range(0, m_stateCount - 1) - .Where(i => !m_finalStates.Contains(i)) - ); - - optimalStates.Add(state); - queue.Add(state); - - var rmap = m_transitions - .GroupBy(t => t.s2) - .ToLookup( - g => g.Key, // s2 - g => g.ToLookup(t => t.edge, t => t.s1) - ); - - while (queue.Count > 0) { - var stateA = queue.First(); - queue.Remove(stateA); - - for (int c = 0; c < m_symbolCount; c++) { - var stateX = new HashSet<int>(); - foreach(var a in stateA) - stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' - - foreach (var stateY in optimalStates.ToArray()) { - if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { - var stateR1 = new HashSet<int>(stateY); - var stateR2 = new HashSet<int>(stateY); - - stateR1.IntersectWith(stateX); - stateR2.ExceptWith(stateX); - - optimalStates.Remove(stateY); - optimalStates.Add(stateR1); - optimalStates.Add(stateR2); - - if (queue.Contains(stateY)) { - queue.Remove(stateY); - queue.Add(stateR1); - queue.Add(stateR2); - } else { - queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); - } - } - } - } - } - - // карта получения оптимального состояния по соотвествующему ему простому состоянию - var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); - - // получаем минимальный алфавит - // входные символы не различимы, если Move(s,a1) == Move(s,a2) - var optimalAlphabet = m_transitions - .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); - - var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); - - var optimalTags = m_finalStates - .GroupBy(s => statesMap[s]) - .ToDictionary( - g => g.Key, - g => g.ToArray() - ); - - // построение автомата - optimalDFA.SetInitialState(statesMap[m_initialState]); - - foreach (var pair in optimalTags) - optimalDFA.MarkFinalState(pair.Key, pair.Value); - - foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) - optimalDFA.Add(new AutomatonTransition(t.s1, t.s2, t.edge)); - - } - - public void MarkFinalState(int state) { - throw new NotImplementedException(); - } - - public void Add(AutomatonTransition item) { - throw new NotImplementedException(); - } - - public void Clear() { - throw new NotImplementedException(); - } - - public bool Contains(AutomatonTransition item) { - throw new NotImplementedException(); - } - - public void CopyTo(AutomatonTransition[] array, int arrayIndex) { - throw new NotImplementedException(); - } - - public bool Remove(AutomatonTransition item) { - throw new NotImplementedException(); - } - - public int Count { - get { - throw new NotImplementedException(); - } - } - - public bool IsReadOnly { - get { - throw new NotImplementedException(); - } - } - - public IEnumerator<AutomatonTransition> GetEnumerator() { - throw new NotImplementedException(); - } - - System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { - throw new NotImplementedException(); - } - - protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { - Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); - Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); - - var inputMap = inputAlphabet.CreateReverseMap(); - var stateMap = stateAlphabet.CreateReverseMap(); - - for (int i = 0; i < inputMap.Length; i++) - Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); - - - foreach(var t in m_transitions) - Console.WriteLine( - "[{0}] -{{{1}}}-> [{2}]{3}", - stateMap[t.s1], - String.Join(",", inputMap[t.edge]), - stateMap[t.s2], - m_finalStates.ContainsKey(t.s2) ? "$" : "" - ); - - } - - } -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/IDFATable2.cs Thu Mar 03 08:41:02 2016 +0300 @@ -0,0 +1,9 @@ +using System; + +namespace Implab.Automaton.RegularExpressions { + public interface IDFATable2<TTag> : IDFATable { + void MarkFinalState(int state, TTag[] tags); + + } +} +
--- a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Wed Mar 02 19:59:16 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Thu Mar 03 08:41:02 2016 +0300 @@ -1,7 +1,7 @@ using System; namespace Implab.Automaton.RegularExpressions { - public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> { + public class RegularDFADefinition<TInput, TTag> : DFATable { readonly IAlphabet<TInput> m_alphabet; @@ -18,7 +18,7 @@ } } - protected override DFAStateDescriptior<TTag>[] ConstructTransitionTable() { + protected override DFAStateDescriptior[] ConstructTransitionTable() { if (InputAlphabet.Count != m_alphabet.Count) throw new InvalidOperationException("The alphabet doesn't match the transition table");
--- a/Implab/Implab.csproj Wed Mar 02 19:59:16 2016 +0300 +++ b/Implab/Implab.csproj Thu Mar 03 08:41:02 2016 +0300 @@ -183,13 +183,14 @@ <Compile Include="Formats\JSON\StringTranslator.cs" /> <Compile Include="Automaton\MapAlphabet.cs" /> <Compile Include="Automaton\DummyAlphabet.cs" /> - <Compile Include="Automaton\DFATransitionTable.cs" /> <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> <Compile Include="Formats\CharAlphabet.cs" /> <Compile Include="Formats\ByteAlphabet.cs" /> <Compile Include="Formats\RegularCharDFADefinition.cs" /> <Compile Include="Automaton\IDFATable.cs" /> <Compile Include="Automaton\IDFATableBuilder.cs" /> + <Compile Include="Automaton\DFATable.cs" /> + <Compile Include="Automaton\RegularExpressions\IDFATable2.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <ItemGroup />