diff Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 178:d5c5db0335ee ref20160224

working on JSON parser
author cin
date Wed, 23 Mar 2016 19:51:45 +0300
parents a0ff6a0e9c44
children 302ca905c19e
line wrap: on
line diff
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs	Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs	Wed Mar 23 19:51:45 2016 +0300
@@ -10,7 +10,7 @@
     /// регулярное выражение и вычисляет followpos, затем используется метод
     /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
     /// </summary>
-    public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
+    public class RegularExpressionVisitor : IVisitor {
         int m_idx;
         Token m_root;
         HashSet<int> m_firstpos;
@@ -19,13 +19,23 @@
         readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
         readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
         readonly HashSet<int> m_ends = new HashSet<int>();
-        readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
 
-        public Dictionary<int, HashSet<int>> FollowposMap {
-            get { return m_followpos; }
+        readonly IDFATableBuilder m_builder;
+        readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
+            false,
+            new CustomEqualityComparer<HashSet<int>>(
+                (x, y) => x.SetEquals(y),
+                x => x.Sum(n => n.GetHashCode())
+            )
+        );
+
+        public RegularExpressionVisitor(IDFATableBuilder builder) {
+            Safe.ArgumentNotNull(builder, "builder");
+
+            m_builder = builder;
         }
 
-        public HashSet<int> Followpos(int pos) {
+        HashSet<int> Followpos(int pos) {
             HashSet<int> set;
             return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
         }
@@ -42,6 +52,9 @@
             return false;
         }
 
+        protected int Index {
+            get { return m_idx; }
+        }
 
         public void Visit(AltToken token) {
             if (m_root == null)
@@ -112,45 +125,23 @@
             m_lastpos = new HashSet<int>(new[] { m_idx });
         }
 
-        public void Visit(EndToken<TTag> token) {
+        public virtual void Visit(EndToken token) {
             if (m_root == null)
                 m_root = token;
             m_idx++;
-            m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
-            m_firstpos = new HashSet<int>(new[] { m_idx });
-            m_lastpos = new HashSet<int>(new[] { m_idx });
-            Followpos(m_idx);
-            m_ends.Add(m_idx);
-            m_tags.Add(m_idx, token.Tag);
-        }
-
-        public void Visit(EndToken token) {
-            if (m_root == null)
-                m_root = token;
-            m_idx++;
-            m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
+            m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
             m_firstpos = new HashSet<int>(new[] { m_idx });
             m_lastpos = new HashSet<int>(new[] { m_idx });
             Followpos(m_idx);
             m_ends.Add(m_idx);
         }
 
-        public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
-            Safe.ArgumentNotNull(dfa,"dfa");
+        public void BuildDFA() {
+            AddState(m_firstpos);
+            SetInitialState(m_firstpos);
 
-            var states = new MapAlphabet<HashSet<int>>(
-                             false,
-                             new CustomEqualityComparer<HashSet<int>>(
-                                 (x, y) => x.SetEquals(y),
-                                 x => x.Sum(n => n.GetHashCode())
-                             ));
-
-            var initialState = states.DefineSymbol(m_firstpos);
-            dfa.SetInitialState(initialState);
-
-            var tags = GetStateTags(m_firstpos);
-            if (tags != null && tags.Length > 0)
-                dfa.MarkFinalState(initialState, tags);
+            if(IsFinal(m_firstpos))
+                MarkFinalState(m_firstpos);
 
             var inputMax = m_indexes.Values.Max();
             var queue = new Queue<HashSet<int>>();
@@ -158,49 +149,64 @@
             queue.Enqueue(m_firstpos);
 
             while (queue.Count > 0) {
-                var state = queue.Dequeue();
-                var s1 = states.Translate(state);
-                Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
+                var s1 = queue.Dequeue();
 
                 for (int a = 0; a <= inputMax; a++) {
-                    var next = new HashSet<int>();
-                    foreach (var p in state) {
+                    var s2 = new HashSet<int>();
+                    foreach (var p in s1) {
                         if (m_indexes[p] == a) {
-                            next.UnionWith(Followpos(p));
+                            s2.UnionWith(Followpos(p));
                         }
                     }
-                    if (next.Count > 0) {
-                        int s2;
-                        if (states.Contains(next)) {
-                            s2 = states.Translate(next);
-                        } else {
-                            s2 = states.DefineSymbol(next);
+                    if (s2.Count > 0) {
+                        if (!HasState(s2)) {
+                            AddState(s2);
+                            if (IsFinal(s2))
+                                MarkFinalState(s2);
+                            
+                            queue.Enqueue(s2);
+                        }
 
-                            if (IsFinal(next)) {
-                                
-                                dfa.MarkFinalState(s2);
-                                tags = GetStateTags(next);
-                                if (tags != null && tags.Length > 0)
-                                    dfa.SetStateTag(s2, tags);
-                            }
-                            
-                            queue.Enqueue(next);
-                        }
-                        dfa.Add(new AutomatonTransition(s1, s2, a));
+                        DefineTransition(s1, s2, a);
                     }
+
                 }
             }
         }
 
+        protected bool HasState(HashSet<int> state) {
+            return m_states.Contains(state);
+        }
+
+        protected void AddState(HashSet<int> state) {
+            Debug.Assert(!HasState(state));
+
+            m_states.DefineSymbol(state);
+        }
+
+        protected int Translate(HashSet<int> state) {
+            Debug.Assert(HasState(state));
+
+            return m_states.Translate(state);
+        }
+
+        protected virtual void SetInitialState(HashSet<int> state) {
+            m_builder.SetInitialState(Translate(state));
+        }
+
+        protected virtual void MarkFinalState(HashSet<int> state) {
+            m_builder.MarkFinalState(Translate(state));
+        }
+
+        protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
+            
+            m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
+        }
+
         bool IsFinal(IEnumerable<int> state) {
             Debug.Assert(state != null);
             return state.Any(m_ends.Contains);
         }
 
-        TTag[] GetStateTags(IEnumerable<int> state) {
-            Debug.Assert(state != null);
-            return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
-        }
-
     }
 }