diff Implab/Automaton/RegularExpressions/RegularDFA.cs @ 172:92d5278d1b10 ref20160224

Working on text scanner
author cin
date Mon, 14 Mar 2016 01:19:38 +0300
parents
children 0c3c69fe225b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs	Mon Mar 14 01:19:38 2016 +0300
@@ -0,0 +1,89 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Implab.Automaton.RegularExpressions {
+    public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
+
+        readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
+        readonly IAlphabet<TInput> m_alphabet;
+
+        public RegularDFA(IAlphabet<TInput> alphabet) {
+            Safe.ArgumentNotNull(alphabet, "aplhabet");
+
+            m_alphabet = alphabet;
+        }
+
+
+        public IAlphabet<TInput> InputAlphabet {
+            get {
+                return m_alphabet;
+            }
+        }
+
+        public void MarkFinalState(int s, TTag[] tags) {
+            MarkFinalState(s);
+            SetStateTag(s, tags);
+        }
+
+        public void SetStateTag(int s, TTag[] tags) {
+            Safe.ArgumentNotNull(tags, "tags");
+            m_tags[s] = tags;
+        }
+
+        public TTag[] GetStateTag(int s) {
+            TTag[] tags;
+            return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
+        }
+
+        public new DFAStateDescriptor<TTag>[] CreateTransitionTable() {
+            var table = new DFAStateDescriptor<TTag>[StateCount];
+
+            foreach (var t in this) {
+                if (table[t.s1].transitions == null)
+                    table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1));
+                if (table[t.s2].transitions == null)
+                    table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2));
+                table[t.s1].transitions[t.edge] = t.s2;
+            }
+
+            return table;
+        }
+
+        /// <summary>
+        /// Optimize the specified alphabet.
+        /// </summary>
+        /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
+        public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
+            Safe.ArgumentNotNull(alphabet, "alphabet");
+
+            var dfa = new RegularDFA<TInput, TTag>(alphabet);
+
+            var states = new DummyAlphabet(StateCount);
+            var alphaMap = new Dictionary<int,int>();
+            var stateMap = new Dictionary<int,int>();
+
+            Optimize(dfa, alphaMap, stateMap); 
+
+            // mark tags in the new DFA
+            foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
+                dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
+
+            // make the alphabet for the new DFA
+            foreach (var pair in alphaMap)
+                alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
+            
+            return dfa;
+        }
+
+        protected override IEnumerable<HashSet<int>> GroupFinalStates() {
+            var arrayComparer = new CustomEqualityComparer<TTag[]>(
+                (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
+                x => x.Sum(it => x.GetHashCode())
+            );
+            return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
+        }
+
+    }
+}
+