diff Implab/Parsing/DFADefinition.cs @ 158:130781364799 v2

refactoring, code cleanup
author cin
date Thu, 18 Feb 2016 14:34:02 +0300
parents
children 5558e43c79bb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/DFADefinition.cs	Thu Feb 18 14:34:02 2016 +0300
@@ -0,0 +1,262 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Implab.Parsing {
+    public class DFADefinition : IDFADefinition {
+        readonly List<DFAStateDescriptior> m_states;
+        
+        public const int INITIAL_STATE = 1;
+        public const int UNREACHEBLE_STATE = 0;
+
+        DFAStateDescriptior[] m_statesArray;
+        readonly int m_alpabetSize;
+
+        public DFADefinition(int alphabetSize) {
+            m_states = new List<DFAStateDescriptior>();
+            m_alpabetSize = alphabetSize;
+        
+            m_states.Add(new DFAStateDescriptior());
+        }
+
+        public DFAStateDescriptior[] States {
+            get {
+                if (m_statesArray == null)
+                    m_statesArray = m_states.ToArray();
+                return m_statesArray;
+            }
+        }
+
+        public bool InitialStateIsFinal {
+            get {
+                return m_states[INITIAL_STATE].final;
+            }
+        }
+
+        public int AddState() {
+            var index = m_states.Count;
+            m_states.Add(new DFAStateDescriptior {
+                final = false,
+                transitions = new int[AlphabetSize]
+            });
+            m_statesArray = null;
+
+            return index;
+        }
+
+        public int AddState(int[] tag) {
+            var index = m_states.Count;
+            bool final = tag != null && tag.Length != 0;
+            m_states.Add(new DFAStateDescriptior {
+                final = final,
+                transitions = new int[AlphabetSize],
+                tag = final ? tag : null
+            });
+            m_statesArray = null;
+            return index;
+        }
+
+        public void DefineTransition(int s1,int s2, int symbol) {
+            Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1");
+            Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2");
+            Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol");
+
+            m_states[s1].transitions[symbol] = s2;
+        }
+
+        public void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) {
+            Safe.ArgumentNotNull(minimalDFA, "minimalDFA");
+            Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet");
+
+            var setComparer = new CustomEqualityComparer<HashSet<int>>(
+                (x, y) => x.SetEquals(y),
+                (s) => s.Sum(x => x.GetHashCode())
+            );
+
+            var arrayComparer = new CustomEqualityComparer<int[]>(
+                (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)),
+                (a) => a.Sum(x => x.GetHashCode())
+            );
+
+            var optimalStates = new HashSet<HashSet<int>>(setComparer);
+            var queue = new HashSet<HashSet<int>>(setComparer);
+
+            foreach (var g in Enumerable
+                .Range(INITIAL_STATE, m_states.Count-1)
+                .Select(i => new {
+                    index = i,
+                    descriptor = m_states[i]
+                })
+                .Where(x => x.descriptor.final)
+                .GroupBy(x => x.descriptor.tag, arrayComparer)
+            ) {
+                optimalStates.Add(new HashSet<int>(g.Select(x => x.index)));
+            }
+
+            var state = new HashSet<int>(
+                Enumerable
+                    .Range(INITIAL_STATE, m_states.Count - 1)
+                    .Where(i => !m_states[i].final)
+            );
+            optimalStates.Add(state);
+            queue.Add(state);
+
+            while (queue.Count > 0) {
+                var stateA = queue.First();
+                queue.Remove(stateA);
+
+                for (int c = 0; c < AlphabetSize; c++) {
+                    var stateX = new HashSet<int>();
+
+                    for(int s = 1; s < m_states.Count; s++) {
+                        if (stateA.Contains(m_states[s].transitions[c]))
+                            stateX.Add(s);
+                    }
+
+                    foreach (var stateY in optimalStates.ToArray()) {
+                        if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
+                            var stateR1 = new HashSet<int>(stateY);
+                            var stateR2 = new HashSet<int>(stateY);
+
+                            stateR1.IntersectWith(stateX);
+                            stateR2.ExceptWith(stateX);
+
+                            optimalStates.Remove(stateY);
+                            optimalStates.Add(stateR1);
+                            optimalStates.Add(stateR2);
+
+                            if (queue.Contains(stateY)) {
+                                queue.Remove(stateY);
+                                queue.Add(stateR1);
+                                queue.Add(stateR2);
+                            } else {
+                                queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
+                            }
+                        }
+                    }
+                }
+            }
+
+            // строим карты соотвествия оптимальных состояний с оригинальными
+
+            var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE));
+
+            // карта получения оптимального состояния по соотвествующему ему простому состоянию
+            int[] reveseOptimalMap = new int[m_states.Count];
+            // карта с индексами оптимальных состояний 
+            HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1];
+            {
+                optimalMap[0] = new HashSet<int>(); // unreachable state
+                optimalMap[1] = initialState; // initial state
+                foreach (var ss in initialState)
+                    reveseOptimalMap[ss] = 1;
+
+                int i = 2;
+                foreach (var s in optimalStates) {
+                    if (s.SetEquals(initialState))
+                        continue;
+                    optimalMap[i] = s;
+                    foreach (var ss in s)
+                        reveseOptimalMap[ss] = i;
+                    i++;
+                }
+            }
+
+            // получаем минимальный алфавит
+
+            var minClasses = new HashSet<HashSet<int>>(setComparer);
+            var alphaQueue = new Queue<HashSet<int>>();
+            alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
+
+            for (int s = 1 ; s < optimalMap.Length; s++) {
+                var newQueue = new Queue<HashSet<int>>();
+
+                foreach (var A in alphaQueue) {
+                    if (A.Count == 1) {
+                        minClasses.Add(A);
+                        continue;
+                    }
+
+                    // различаем классы символов, которые переводят в различные оптимальные состояния
+                    // optimalState -> alphaClass
+                    var classes = new Dictionary<int, HashSet<int>>();
+
+                    foreach (var term in A) {
+                        // ищем все переходы класса по символу term
+                        var s2 = reveseOptimalMap[
+                                     optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть
+                                 ];
+
+                        HashSet<int> A2;
+                        if (!classes.TryGetValue(s2, out A2)) {
+                            A2 = new HashSet<int>();
+                            newQueue.Enqueue(A2);
+                            classes[s2] = A2;
+                        }
+                        A2.Add(term);
+                    }
+                }
+
+                if (newQueue.Count == 0)
+                    break;
+                alphaQueue = newQueue;
+            }
+
+            foreach (var A in alphaQueue)
+                minClasses.Add(A);
+
+            var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses);
+            
+            // построение автомата
+
+            var states = new int[ optimalMap.Length ];
+            states[0] = UNREACHEBLE_STATE;
+            
+            for(var s = INITIAL_STATE; s < states.Length; s++) {
+                var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray();
+                if (tags.Length > 0)
+                    states[s] = minimalDFA.AddState(tags);
+                else
+                    states[s] = minimalDFA.AddState();
+            }
+
+            Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE);
+
+            for (int s1 = 1; s1 < m_states.Count;  s1++) {
+                for (int c = 0; c < AlphabetSize; c++) {
+                    var s2 = m_states[s1].transitions[c];
+                    if (s2 != UNREACHEBLE_STATE) {
+                        minimalDFA.DefineTransition(
+                            reveseOptimalMap[s1],
+                            reveseOptimalMap[s2],
+                            alphabetMap[c]
+                        );
+                    }
+                }
+            }
+
+        }
+
+        public void PrintDFA<TA>(IAlphabet<TA> alphabet) {
+            
+            var reverseMap = alphabet.CreateReverseMap();
+            
+            for (int i = 1; i < reverseMap.Length; i++) {
+                Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i]));
+            }
+
+            for (int i = 1; i < m_states.Count; i++) {
+                var s = m_states[i];
+                for (int c = 0; c < AlphabetSize; c++)
+                    if (s.transitions[c] != UNREACHEBLE_STATE)
+                        Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : "");
+            }
+        }
+
+        public int AlphabetSize {
+            get;
+        }
+    }
+}