Mercurial > pub > ImplabNet
view Implab/Parsing/DFADefinitionBase.cs @ 89:ce0171cacec4 v2
improved performance of a chained map operation
author | cin |
---|---|
date | Wed, 08 Oct 2014 02:19:45 +0400 |
parents | c0bf853aa04f |
children | 97fbbf816844 |
line wrap: on
line source
using Implab; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace Implab.Parsing { public abstract class DFADefinitionBase : IDFADefinition { readonly List<DFAStateDescriptior> m_states; public const int INITIAL_STATE = 1; public const int UNREACHEBLE_STATE = 0; DFAStateDescriptior[] m_statesArray; public DFADefinitionBase() { m_states = new List<DFAStateDescriptior>(); m_states.Add(new DFAStateDescriptior()); } public DFAStateDescriptior[] States { get { if (m_statesArray == null) m_statesArray = m_states.ToArray(); return m_statesArray; } } public bool InitialStateIsFinal { get { return m_states[INITIAL_STATE].final; } } public int AddState() { var index = m_states.Count; m_states.Add(new DFAStateDescriptior { final = false, transitions = new int[AlphabetSize] }); return index; } public int AddState(int[] tag) { var index = m_states.Count; bool final = tag == null || tag.Length == 0 ? false : true; m_states.Add(new DFAStateDescriptior { final = final, transitions = new int[AlphabetSize], tag = final ? tag : null }); return index; } public void DefineTransition(int s1,int s2, int symbol) { Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); m_states[s1].transitions[symbol] = s2; } protected void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) { Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); var setComparer = new CustomEqualityComparer<HashSet<int>>( (x, y) => x.SetEquals(y), (s) => s.Sum(x => x.GetHashCode()) ); var arrayComparer = new CustomEqualityComparer<int[]>( (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), (a) => a.Sum(x => x.GetHashCode()) ); var optimalStates = new HashSet<HashSet<int>>(setComparer); var queue = new HashSet<HashSet<int>>(setComparer); foreach (var g in Enumerable .Range(INITIAL_STATE, m_states.Count-1) .Select(i => new { index = i, descriptor = m_states[i] }) .Where(x => x.descriptor.final) .GroupBy(x => x.descriptor.tag, arrayComparer) ) { optimalStates.Add(new HashSet<int>(g.Select(x => x.index))); } var state = new HashSet<int>( Enumerable .Range(INITIAL_STATE, m_states.Count - 1) .Where(i => !m_states[i].final) ); optimalStates.Add(state); queue.Add(state); while (queue.Count > 0) { var stateA = queue.First(); queue.Remove(stateA); for (int c = 0; c < AlphabetSize; c++) { var stateX = new HashSet<int>(); for(int s = 1; s < m_states.Count; s++) { if (stateA.Contains(m_states[s].transitions[c])) stateX.Add(s); } foreach (var stateY in optimalStates.ToArray()) { if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { var stateR1 = new HashSet<int>(stateY); var stateR2 = new HashSet<int>(stateY); stateR1.IntersectWith(stateX); stateR2.ExceptWith(stateX); optimalStates.Remove(stateY); optimalStates.Add(stateR1); optimalStates.Add(stateR2); if (queue.Contains(stateY)) { queue.Remove(stateY); queue.Add(stateR1); queue.Add(stateR2); } else { queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); } } } } } // строим карты соотвествия оптимальных состояний с оригинальными var initialState = optimalStates.Where(x => x.Contains(INITIAL_STATE)).Single(); // карта получения оптимального состояния по соотвествующему ему простому состоянию int[] reveseOptimalMap = new int[m_states.Count]; // карта с индексами оптимальных состояний HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1]; { optimalMap[0] = new HashSet<int>(); // unreachable state optimalMap[1] = initialState; // initial state foreach (var ss in initialState) reveseOptimalMap[ss] = 1; int i = 2; foreach (var s in optimalStates) { if (s.SetEquals(initialState)) continue; optimalMap[i] = s; foreach (var ss in s) reveseOptimalMap[ss] = i; i++; } } // получаем минимальный алфавит var minClasses = new HashSet<HashSet<int>>(setComparer); var alphaQueue = new Queue<HashSet<int>>(); alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); for (int s = 1 ; s < optimalMap.Length; s++) { var newQueue = new Queue<HashSet<int>>(); foreach (var A in alphaQueue) { if (A.Count == 1) { minClasses.Add(A); continue; } // различаем классы символов, которые переводят в различные оптимальные состояния // optimalState -> alphaClass var classes = new Dictionary<int, HashSet<int>>(); foreach (var term in A) { // ищем все переходы класса по символу term var s2 = reveseOptimalMap[ optimalMap[s].Select(x => m_states[x].transitions[term]) // все элементарные состояния, куда переходит класс s .Where(x => x != 0) // только допустимые .FirstOrDefault() // первое допустимое элементарное состояние, если есть ]; HashSet<int> A2; if (!classes.TryGetValue(s2, out A2)) { A2 = new HashSet<int>(); newQueue.Enqueue(A2); classes[s2] = A2; } A2.Add(term); } } if (newQueue.Count == 0) break; alphaQueue = newQueue; } foreach (var A in alphaQueue) minClasses.Add(A); var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); // построение автомата var states = new int[ optimalMap.Length ]; states[0] = UNREACHEBLE_STATE; for(var s = INITIAL_STATE; s < states.Length; s++) { var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray(); if (tags.Length > 0) states[s] = minimalDFA.AddState(tags); else states[s] = minimalDFA.AddState(); } Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); for (int s1 = 1; s1 < m_states.Count; s1++) { for (int c = 0; c < AlphabetSize; c++) { var s2 = m_states[s1].transitions[c]; if (s2 != UNREACHEBLE_STATE) { minimalDFA.DefineTransition( reveseOptimalMap[s1], reveseOptimalMap[s2], alphabetMap[c] ); } } } } protected void PrintDFA<TA>(IAlphabet<TA> alphabet) { var reverseMap = alphabet.CreateReverseMap(); for (int i = 1; i < reverseMap.Length; i++) { Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); } for (int i = 1; i < m_states.Count; i++) { var s = m_states[i]; for (int c = 0; c < AlphabetSize; c++) if (s.transitions[c] != UNREACHEBLE_STATE) Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); } } public abstract int AlphabetSize { get; } } }