Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 178:d5c5db0335ee ref20160224
working on JSON parser
author | cin |
---|---|
date | Wed, 23 Mar 2016 19:51:45 +0300 |
parents | a0ff6a0e9c44 |
children | 302ca905c19e |
comparison
equal
deleted
inserted
replaced
177:a0ff6a0e9c44 | 178:d5c5db0335ee |
---|---|
8 /// <summary> | 8 /// <summary> |
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит | 9 /// Используется для построения ДКА по регулярному выражению, сначала обходит |
10 /// регулярное выражение и вычисляет followpos, затем используется метод | 10 /// регулярное выражение и вычисляет followpos, затем используется метод |
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
12 /// </summary> | 12 /// </summary> |
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | 13 public class RegularExpressionVisitor : IVisitor { |
14 int m_idx; | 14 int m_idx; |
15 Token m_root; | 15 Token m_root; |
16 HashSet<int> m_firstpos; | 16 HashSet<int> m_firstpos; |
17 HashSet<int> m_lastpos; | 17 HashSet<int> m_lastpos; |
18 | 18 |
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
21 readonly HashSet<int> m_ends = new HashSet<int>(); | 21 readonly HashSet<int> m_ends = new HashSet<int>(); |
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | 22 |
23 | 23 readonly IDFATableBuilder m_builder; |
24 public Dictionary<int, HashSet<int>> FollowposMap { | 24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( |
25 get { return m_followpos; } | 25 false, |
26 } | 26 new CustomEqualityComparer<HashSet<int>>( |
27 | 27 (x, y) => x.SetEquals(y), |
28 public HashSet<int> Followpos(int pos) { | 28 x => x.Sum(n => n.GetHashCode()) |
29 ) | |
30 ); | |
31 | |
32 public RegularExpressionVisitor(IDFATableBuilder builder) { | |
33 Safe.ArgumentNotNull(builder, "builder"); | |
34 | |
35 m_builder = builder; | |
36 } | |
37 | |
38 HashSet<int> Followpos(int pos) { | |
29 HashSet<int> set; | 39 HashSet<int> set; |
30 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | 40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
31 } | 41 } |
32 | 42 |
33 bool Nullable(object n) { | 43 bool Nullable(object n) { |
40 if (catToken != null) | 50 if (catToken != null) |
41 return Nullable(catToken.Left) && Nullable(catToken.Right); | 51 return Nullable(catToken.Left) && Nullable(catToken.Right); |
42 return false; | 52 return false; |
43 } | 53 } |
44 | 54 |
55 protected int Index { | |
56 get { return m_idx; } | |
57 } | |
45 | 58 |
46 public void Visit(AltToken token) { | 59 public void Visit(AltToken token) { |
47 if (m_root == null) | 60 if (m_root == null) |
48 m_root = token; | 61 m_root = token; |
49 var firtspos = new HashSet<int>(); | 62 var firtspos = new HashSet<int>(); |
110 m_indexes[m_idx] = token.Value; | 123 m_indexes[m_idx] = token.Value; |
111 m_firstpos = new HashSet<int>(new[] { m_idx }); | 124 m_firstpos = new HashSet<int>(new[] { m_idx }); |
112 m_lastpos = new HashSet<int>(new[] { m_idx }); | 125 m_lastpos = new HashSet<int>(new[] { m_idx }); |
113 } | 126 } |
114 | 127 |
115 public void Visit(EndToken<TTag> token) { | 128 public virtual void Visit(EndToken token) { |
116 if (m_root == null) | 129 if (m_root == null) |
117 m_root = token; | 130 m_root = token; |
118 m_idx++; | 131 m_idx++; |
119 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | 132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; |
120 m_firstpos = new HashSet<int>(new[] { m_idx }); | 133 m_firstpos = new HashSet<int>(new[] { m_idx }); |
121 m_lastpos = new HashSet<int>(new[] { m_idx }); | 134 m_lastpos = new HashSet<int>(new[] { m_idx }); |
122 Followpos(m_idx); | 135 Followpos(m_idx); |
123 m_ends.Add(m_idx); | 136 m_ends.Add(m_idx); |
124 m_tags.Add(m_idx, token.Tag); | 137 } |
125 } | 138 |
126 | 139 public void BuildDFA() { |
127 public void Visit(EndToken token) { | 140 AddState(m_firstpos); |
128 if (m_root == null) | 141 SetInitialState(m_firstpos); |
129 m_root = token; | 142 |
130 m_idx++; | 143 if(IsFinal(m_firstpos)) |
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | 144 MarkFinalState(m_firstpos); |
132 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
133 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
134 Followpos(m_idx); | |
135 m_ends.Add(m_idx); | |
136 } | |
137 | |
138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | |
139 Safe.ArgumentNotNull(dfa,"dfa"); | |
140 | |
141 var states = new MapAlphabet<HashSet<int>>( | |
142 false, | |
143 new CustomEqualityComparer<HashSet<int>>( | |
144 (x, y) => x.SetEquals(y), | |
145 x => x.Sum(n => n.GetHashCode()) | |
146 )); | |
147 | |
148 var initialState = states.DefineSymbol(m_firstpos); | |
149 dfa.SetInitialState(initialState); | |
150 | |
151 var tags = GetStateTags(m_firstpos); | |
152 if (tags != null && tags.Length > 0) | |
153 dfa.MarkFinalState(initialState, tags); | |
154 | 145 |
155 var inputMax = m_indexes.Values.Max(); | 146 var inputMax = m_indexes.Values.Max(); |
156 var queue = new Queue<HashSet<int>>(); | 147 var queue = new Queue<HashSet<int>>(); |
157 | 148 |
158 queue.Enqueue(m_firstpos); | 149 queue.Enqueue(m_firstpos); |
159 | 150 |
160 while (queue.Count > 0) { | 151 while (queue.Count > 0) { |
161 var state = queue.Dequeue(); | 152 var s1 = queue.Dequeue(); |
162 var s1 = states.Translate(state); | |
163 Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |
164 | 153 |
165 for (int a = 0; a <= inputMax; a++) { | 154 for (int a = 0; a <= inputMax; a++) { |
166 var next = new HashSet<int>(); | 155 var s2 = new HashSet<int>(); |
167 foreach (var p in state) { | 156 foreach (var p in s1) { |
168 if (m_indexes[p] == a) { | 157 if (m_indexes[p] == a) { |
169 next.UnionWith(Followpos(p)); | 158 s2.UnionWith(Followpos(p)); |
170 } | 159 } |
171 } | 160 } |
172 if (next.Count > 0) { | 161 if (s2.Count > 0) { |
173 int s2; | 162 if (!HasState(s2)) { |
174 if (states.Contains(next)) { | 163 AddState(s2); |
175 s2 = states.Translate(next); | 164 if (IsFinal(s2)) |
176 } else { | 165 MarkFinalState(s2); |
177 s2 = states.DefineSymbol(next); | |
178 | |
179 if (IsFinal(next)) { | |
180 | |
181 dfa.MarkFinalState(s2); | |
182 tags = GetStateTags(next); | |
183 if (tags != null && tags.Length > 0) | |
184 dfa.SetStateTag(s2, tags); | |
185 } | |
186 | 166 |
187 queue.Enqueue(next); | 167 queue.Enqueue(s2); |
188 } | 168 } |
189 dfa.Add(new AutomatonTransition(s1, s2, a)); | 169 |
170 DefineTransition(s1, s2, a); | |
190 } | 171 } |
172 | |
191 } | 173 } |
192 } | 174 } |
175 } | |
176 | |
177 protected bool HasState(HashSet<int> state) { | |
178 return m_states.Contains(state); | |
179 } | |
180 | |
181 protected void AddState(HashSet<int> state) { | |
182 Debug.Assert(!HasState(state)); | |
183 | |
184 m_states.DefineSymbol(state); | |
185 } | |
186 | |
187 protected int Translate(HashSet<int> state) { | |
188 Debug.Assert(HasState(state)); | |
189 | |
190 return m_states.Translate(state); | |
191 } | |
192 | |
193 protected virtual void SetInitialState(HashSet<int> state) { | |
194 m_builder.SetInitialState(Translate(state)); | |
195 } | |
196 | |
197 protected virtual void MarkFinalState(HashSet<int> state) { | |
198 m_builder.MarkFinalState(Translate(state)); | |
199 } | |
200 | |
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |
202 | |
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |
193 } | 204 } |
194 | 205 |
195 bool IsFinal(IEnumerable<int> state) { | 206 bool IsFinal(IEnumerable<int> state) { |
196 Debug.Assert(state != null); | 207 Debug.Assert(state != null); |
197 return state.Any(m_ends.Contains); | 208 return state.Any(m_ends.Contains); |
198 } | 209 } |
199 | 210 |
200 TTag[] GetStateTags(IEnumerable<int> state) { | |
201 Debug.Assert(state != null); | |
202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); | |
203 } | |
204 | |
205 } | 211 } |
206 } | 212 } |