Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 178:d5c5db0335ee ref20160224
working on JSON parser
| author | cin |
|---|---|
| date | Wed, 23 Mar 2016 19:51:45 +0300 |
| parents | a0ff6a0e9c44 |
| children | 302ca905c19e |
comparison
equal
deleted
inserted
replaced
| 177:a0ff6a0e9c44 | 178:d5c5db0335ee |
|---|---|
| 8 /// <summary> | 8 /// <summary> |
| 9 /// Используется для построения ДКА по регулярному выражению, сначала обходит | 9 /// Используется для построения ДКА по регулярному выражению, сначала обходит |
| 10 /// регулярное выражение и вычисляет followpos, затем используется метод | 10 /// регулярное выражение и вычисляет followpos, затем используется метод |
| 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
| 12 /// </summary> | 12 /// </summary> |
| 13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | 13 public class RegularExpressionVisitor : IVisitor { |
| 14 int m_idx; | 14 int m_idx; |
| 15 Token m_root; | 15 Token m_root; |
| 16 HashSet<int> m_firstpos; | 16 HashSet<int> m_firstpos; |
| 17 HashSet<int> m_lastpos; | 17 HashSet<int> m_lastpos; |
| 18 | 18 |
| 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
| 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
| 21 readonly HashSet<int> m_ends = new HashSet<int>(); | 21 readonly HashSet<int> m_ends = new HashSet<int>(); |
| 22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | 22 |
| 23 | 23 readonly IDFATableBuilder m_builder; |
| 24 public Dictionary<int, HashSet<int>> FollowposMap { | 24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( |
| 25 get { return m_followpos; } | 25 false, |
| 26 } | 26 new CustomEqualityComparer<HashSet<int>>( |
| 27 | 27 (x, y) => x.SetEquals(y), |
| 28 public HashSet<int> Followpos(int pos) { | 28 x => x.Sum(n => n.GetHashCode()) |
| 29 ) | |
| 30 ); | |
| 31 | |
| 32 public RegularExpressionVisitor(IDFATableBuilder builder) { | |
| 33 Safe.ArgumentNotNull(builder, "builder"); | |
| 34 | |
| 35 m_builder = builder; | |
| 36 } | |
| 37 | |
| 38 HashSet<int> Followpos(int pos) { | |
| 29 HashSet<int> set; | 39 HashSet<int> set; |
| 30 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | 40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
| 31 } | 41 } |
| 32 | 42 |
| 33 bool Nullable(object n) { | 43 bool Nullable(object n) { |
| 40 if (catToken != null) | 50 if (catToken != null) |
| 41 return Nullable(catToken.Left) && Nullable(catToken.Right); | 51 return Nullable(catToken.Left) && Nullable(catToken.Right); |
| 42 return false; | 52 return false; |
| 43 } | 53 } |
| 44 | 54 |
| 55 protected int Index { | |
| 56 get { return m_idx; } | |
| 57 } | |
| 45 | 58 |
| 46 public void Visit(AltToken token) { | 59 public void Visit(AltToken token) { |
| 47 if (m_root == null) | 60 if (m_root == null) |
| 48 m_root = token; | 61 m_root = token; |
| 49 var firtspos = new HashSet<int>(); | 62 var firtspos = new HashSet<int>(); |
| 110 m_indexes[m_idx] = token.Value; | 123 m_indexes[m_idx] = token.Value; |
| 111 m_firstpos = new HashSet<int>(new[] { m_idx }); | 124 m_firstpos = new HashSet<int>(new[] { m_idx }); |
| 112 m_lastpos = new HashSet<int>(new[] { m_idx }); | 125 m_lastpos = new HashSet<int>(new[] { m_idx }); |
| 113 } | 126 } |
| 114 | 127 |
| 115 public void Visit(EndToken<TTag> token) { | 128 public virtual void Visit(EndToken token) { |
| 116 if (m_root == null) | 129 if (m_root == null) |
| 117 m_root = token; | 130 m_root = token; |
| 118 m_idx++; | 131 m_idx++; |
| 119 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | 132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; |
| 120 m_firstpos = new HashSet<int>(new[] { m_idx }); | 133 m_firstpos = new HashSet<int>(new[] { m_idx }); |
| 121 m_lastpos = new HashSet<int>(new[] { m_idx }); | 134 m_lastpos = new HashSet<int>(new[] { m_idx }); |
| 122 Followpos(m_idx); | 135 Followpos(m_idx); |
| 123 m_ends.Add(m_idx); | 136 m_ends.Add(m_idx); |
| 124 m_tags.Add(m_idx, token.Tag); | 137 } |
| 125 } | 138 |
| 126 | 139 public void BuildDFA() { |
| 127 public void Visit(EndToken token) { | 140 AddState(m_firstpos); |
| 128 if (m_root == null) | 141 SetInitialState(m_firstpos); |
| 129 m_root = token; | 142 |
| 130 m_idx++; | 143 if(IsFinal(m_firstpos)) |
| 131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | 144 MarkFinalState(m_firstpos); |
| 132 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
| 133 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 134 Followpos(m_idx); | |
| 135 m_ends.Add(m_idx); | |
| 136 } | |
| 137 | |
| 138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | |
| 139 Safe.ArgumentNotNull(dfa,"dfa"); | |
| 140 | |
| 141 var states = new MapAlphabet<HashSet<int>>( | |
| 142 false, | |
| 143 new CustomEqualityComparer<HashSet<int>>( | |
| 144 (x, y) => x.SetEquals(y), | |
| 145 x => x.Sum(n => n.GetHashCode()) | |
| 146 )); | |
| 147 | |
| 148 var initialState = states.DefineSymbol(m_firstpos); | |
| 149 dfa.SetInitialState(initialState); | |
| 150 | |
| 151 var tags = GetStateTags(m_firstpos); | |
| 152 if (tags != null && tags.Length > 0) | |
| 153 dfa.MarkFinalState(initialState, tags); | |
| 154 | 145 |
| 155 var inputMax = m_indexes.Values.Max(); | 146 var inputMax = m_indexes.Values.Max(); |
| 156 var queue = new Queue<HashSet<int>>(); | 147 var queue = new Queue<HashSet<int>>(); |
| 157 | 148 |
| 158 queue.Enqueue(m_firstpos); | 149 queue.Enqueue(m_firstpos); |
| 159 | 150 |
| 160 while (queue.Count > 0) { | 151 while (queue.Count > 0) { |
| 161 var state = queue.Dequeue(); | 152 var s1 = queue.Dequeue(); |
| 162 var s1 = states.Translate(state); | |
| 163 Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |
| 164 | 153 |
| 165 for (int a = 0; a <= inputMax; a++) { | 154 for (int a = 0; a <= inputMax; a++) { |
| 166 var next = new HashSet<int>(); | 155 var s2 = new HashSet<int>(); |
| 167 foreach (var p in state) { | 156 foreach (var p in s1) { |
| 168 if (m_indexes[p] == a) { | 157 if (m_indexes[p] == a) { |
| 169 next.UnionWith(Followpos(p)); | 158 s2.UnionWith(Followpos(p)); |
| 170 } | 159 } |
| 171 } | 160 } |
| 172 if (next.Count > 0) { | 161 if (s2.Count > 0) { |
| 173 int s2; | 162 if (!HasState(s2)) { |
| 174 if (states.Contains(next)) { | 163 AddState(s2); |
| 175 s2 = states.Translate(next); | 164 if (IsFinal(s2)) |
| 176 } else { | 165 MarkFinalState(s2); |
| 177 s2 = states.DefineSymbol(next); | |
| 178 | |
| 179 if (IsFinal(next)) { | |
| 180 | |
| 181 dfa.MarkFinalState(s2); | |
| 182 tags = GetStateTags(next); | |
| 183 if (tags != null && tags.Length > 0) | |
| 184 dfa.SetStateTag(s2, tags); | |
| 185 } | |
| 186 | 166 |
| 187 queue.Enqueue(next); | 167 queue.Enqueue(s2); |
| 188 } | 168 } |
| 189 dfa.Add(new AutomatonTransition(s1, s2, a)); | 169 |
| 170 DefineTransition(s1, s2, a); | |
| 190 } | 171 } |
| 172 | |
| 191 } | 173 } |
| 192 } | 174 } |
| 175 } | |
| 176 | |
| 177 protected bool HasState(HashSet<int> state) { | |
| 178 return m_states.Contains(state); | |
| 179 } | |
| 180 | |
| 181 protected void AddState(HashSet<int> state) { | |
| 182 Debug.Assert(!HasState(state)); | |
| 183 | |
| 184 m_states.DefineSymbol(state); | |
| 185 } | |
| 186 | |
| 187 protected int Translate(HashSet<int> state) { | |
| 188 Debug.Assert(HasState(state)); | |
| 189 | |
| 190 return m_states.Translate(state); | |
| 191 } | |
| 192 | |
| 193 protected virtual void SetInitialState(HashSet<int> state) { | |
| 194 m_builder.SetInitialState(Translate(state)); | |
| 195 } | |
| 196 | |
| 197 protected virtual void MarkFinalState(HashSet<int> state) { | |
| 198 m_builder.MarkFinalState(Translate(state)); | |
| 199 } | |
| 200 | |
| 201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |
| 202 | |
| 203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |
| 193 } | 204 } |
| 194 | 205 |
| 195 bool IsFinal(IEnumerable<int> state) { | 206 bool IsFinal(IEnumerable<int> state) { |
| 196 Debug.Assert(state != null); | 207 Debug.Assert(state != null); |
| 197 return state.Any(m_ends.Contains); | 208 return state.Any(m_ends.Contains); |
| 198 } | 209 } |
| 199 | 210 |
| 200 TTag[] GetStateTags(IEnumerable<int> state) { | |
| 201 Debug.Assert(state != null); | |
| 202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
| 203 } | |
| 204 | |
| 205 } | 211 } |
| 206 } | 212 } |
