Mercurial > pub > ImplabNet
diff Implab/Parsing/DFADefinition.cs @ 158:130781364799 v2
refactoring, code cleanup
author | cin |
---|---|
date | Thu, 18 Feb 2016 14:34:02 +0300 |
parents | |
children | 5558e43c79bb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/DFADefinition.cs Thu Feb 18 14:34:02 2016 +0300 @@ -0,0 +1,262 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Parsing { + public class DFADefinition : IDFADefinition { + readonly List<DFAStateDescriptior> m_states; + + public const int INITIAL_STATE = 1; + public const int UNREACHEBLE_STATE = 0; + + DFAStateDescriptior[] m_statesArray; + readonly int m_alpabetSize; + + public DFADefinition(int alphabetSize) { + m_states = new List<DFAStateDescriptior>(); + m_alpabetSize = alphabetSize; + + m_states.Add(new DFAStateDescriptior()); + } + + public DFAStateDescriptior[] States { + get { + if (m_statesArray == null) + m_statesArray = m_states.ToArray(); + return m_statesArray; + } + } + + public bool InitialStateIsFinal { + get { + return m_states[INITIAL_STATE].final; + } + } + + public int AddState() { + var index = m_states.Count; + m_states.Add(new DFAStateDescriptior { + final = false, + transitions = new int[AlphabetSize] + }); + m_statesArray = null; + + return index; + } + + public int AddState(int[] tag) { + var index = m_states.Count; + bool final = tag != null && tag.Length != 0; + m_states.Add(new DFAStateDescriptior { + final = final, + transitions = new int[AlphabetSize], + tag = final ? tag : null + }); + m_statesArray = null; + return index; + } + + public void DefineTransition(int s1,int s2, int symbol) { + Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); + Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); + Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); + + m_states[s1].transitions[symbol] = s2; + } + + public void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) { + Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); + Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); + + var setComparer = new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + (s) => s.Sum(x => x.GetHashCode()) + ); + + var arrayComparer = new CustomEqualityComparer<int[]>( + (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), + (a) => a.Sum(x => x.GetHashCode()) + ); + + var optimalStates = new HashSet<HashSet<int>>(setComparer); + var queue = new HashSet<HashSet<int>>(setComparer); + + foreach (var g in Enumerable + .Range(INITIAL_STATE, m_states.Count-1) + .Select(i => new { + index = i, + descriptor = m_states[i] + }) + .Where(x => x.descriptor.final) + .GroupBy(x => x.descriptor.tag, arrayComparer) + ) { + optimalStates.Add(new HashSet<int>(g.Select(x => x.index))); + } + + var state = new HashSet<int>( + Enumerable + .Range(INITIAL_STATE, m_states.Count - 1) + .Where(i => !m_states[i].final) + ); + optimalStates.Add(state); + queue.Add(state); + + while (queue.Count > 0) { + var stateA = queue.First(); + queue.Remove(stateA); + + for (int c = 0; c < AlphabetSize; c++) { + var stateX = new HashSet<int>(); + + for(int s = 1; s < m_states.Count; s++) { + if (stateA.Contains(m_states[s].transitions[c])) + stateX.Add(s); + } + + foreach (var stateY in optimalStates.ToArray()) { + if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { + var stateR1 = new HashSet<int>(stateY); + var stateR2 = new HashSet<int>(stateY); + + stateR1.IntersectWith(stateX); + stateR2.ExceptWith(stateX); + + optimalStates.Remove(stateY); + optimalStates.Add(stateR1); + optimalStates.Add(stateR2); + + if (queue.Contains(stateY)) { + queue.Remove(stateY); + queue.Add(stateR1); + queue.Add(stateR2); + } else { + queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); + } + } + } + } + } + + // строим карты соотвествия оптимальных состояний с оригинальными + + var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE)); + + // карта получения оптимального состояния по соотвествующему ему простому состоянию + int[] reveseOptimalMap = new int[m_states.Count]; + // карта с индексами оптимальных состояний + HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1]; + { + optimalMap[0] = new HashSet<int>(); // unreachable state + optimalMap[1] = initialState; // initial state + foreach (var ss in initialState) + reveseOptimalMap[ss] = 1; + + int i = 2; + foreach (var s in optimalStates) { + if (s.SetEquals(initialState)) + continue; + optimalMap[i] = s; + foreach (var ss in s) + reveseOptimalMap[ss] = i; + i++; + } + } + + // получаем минимальный алфавит + + var minClasses = new HashSet<HashSet<int>>(setComparer); + var alphaQueue = new Queue<HashSet<int>>(); + alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); + + for (int s = 1 ; s < optimalMap.Length; s++) { + var newQueue = new Queue<HashSet<int>>(); + + foreach (var A in alphaQueue) { + if (A.Count == 1) { + minClasses.Add(A); + continue; + } + + // различаем классы символов, которые переводят в различные оптимальные состояния + // optimalState -> alphaClass + var classes = new Dictionary<int, HashSet<int>>(); + + foreach (var term in A) { + // ищем все переходы класса по символу term + var s2 = reveseOptimalMap[ + optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть + ]; + + HashSet<int> A2; + if (!classes.TryGetValue(s2, out A2)) { + A2 = new HashSet<int>(); + newQueue.Enqueue(A2); + classes[s2] = A2; + } + A2.Add(term); + } + } + + if (newQueue.Count == 0) + break; + alphaQueue = newQueue; + } + + foreach (var A in alphaQueue) + minClasses.Add(A); + + var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); + + // построение автомата + + var states = new int[ optimalMap.Length ]; + states[0] = UNREACHEBLE_STATE; + + for(var s = INITIAL_STATE; s < states.Length; s++) { + var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray(); + if (tags.Length > 0) + states[s] = minimalDFA.AddState(tags); + else + states[s] = minimalDFA.AddState(); + } + + Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); + + for (int s1 = 1; s1 < m_states.Count; s1++) { + for (int c = 0; c < AlphabetSize; c++) { + var s2 = m_states[s1].transitions[c]; + if (s2 != UNREACHEBLE_STATE) { + minimalDFA.DefineTransition( + reveseOptimalMap[s1], + reveseOptimalMap[s2], + alphabetMap[c] + ); + } + } + } + + } + + public void PrintDFA<TA>(IAlphabet<TA> alphabet) { + + var reverseMap = alphabet.CreateReverseMap(); + + for (int i = 1; i < reverseMap.Length; i++) { + Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); + } + + for (int i = 1; i < m_states.Count; i++) { + var s = m_states[i]; + for (int c = 0; c < AlphabetSize; c++) + if (s.transitions[c] != UNREACHEBLE_STATE) + Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); + } + } + + public int AlphabetSize { + get; + } + } +}