comparison Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 178:d5c5db0335ee ref20160224

working on JSON parser
author cin
date Wed, 23 Mar 2016 19:51:45 +0300
parents a0ff6a0e9c44
children 302ca905c19e
comparison
equal deleted inserted replaced
177:a0ff6a0e9c44 178:d5c5db0335ee
8 /// <summary> 8 /// <summary>
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит 9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
10 /// регулярное выражение и вычисляет followpos, затем используется метод 10 /// регулярное выражение и вычисляет followpos, затем используется метод
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
12 /// </summary> 12 /// </summary>
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { 13 public class RegularExpressionVisitor : IVisitor {
14 int m_idx; 14 int m_idx;
15 Token m_root; 15 Token m_root;
16 HashSet<int> m_firstpos; 16 HashSet<int> m_firstpos;
17 HashSet<int> m_lastpos; 17 HashSet<int> m_lastpos;
18 18
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 readonly HashSet<int> m_ends = new HashSet<int>(); 21 readonly HashSet<int> m_ends = new HashSet<int>();
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); 22
23 23 readonly IDFATableBuilder m_builder;
24 public Dictionary<int, HashSet<int>> FollowposMap { 24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
25 get { return m_followpos; } 25 false,
26 } 26 new CustomEqualityComparer<HashSet<int>>(
27 27 (x, y) => x.SetEquals(y),
28 public HashSet<int> Followpos(int pos) { 28 x => x.Sum(n => n.GetHashCode())
29 )
30 );
31
32 public RegularExpressionVisitor(IDFATableBuilder builder) {
33 Safe.ArgumentNotNull(builder, "builder");
34
35 m_builder = builder;
36 }
37
38 HashSet<int> Followpos(int pos) {
29 HashSet<int> set; 39 HashSet<int> set;
30 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); 40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
31 } 41 }
32 42
33 bool Nullable(object n) { 43 bool Nullable(object n) {
40 if (catToken != null) 50 if (catToken != null)
41 return Nullable(catToken.Left) && Nullable(catToken.Right); 51 return Nullable(catToken.Left) && Nullable(catToken.Right);
42 return false; 52 return false;
43 } 53 }
44 54
55 protected int Index {
56 get { return m_idx; }
57 }
45 58
46 public void Visit(AltToken token) { 59 public void Visit(AltToken token) {
47 if (m_root == null) 60 if (m_root == null)
48 m_root = token; 61 m_root = token;
49 var firtspos = new HashSet<int>(); 62 var firtspos = new HashSet<int>();
110 m_indexes[m_idx] = token.Value; 123 m_indexes[m_idx] = token.Value;
111 m_firstpos = new HashSet<int>(new[] { m_idx }); 124 m_firstpos = new HashSet<int>(new[] { m_idx });
112 m_lastpos = new HashSet<int>(new[] { m_idx }); 125 m_lastpos = new HashSet<int>(new[] { m_idx });
113 } 126 }
114 127
115 public void Visit(EndToken<TTag> token) { 128 public virtual void Visit(EndToken token) {
116 if (m_root == null) 129 if (m_root == null)
117 m_root = token; 130 m_root = token;
118 m_idx++; 131 m_idx++;
119 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; 132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
120 m_firstpos = new HashSet<int>(new[] { m_idx }); 133 m_firstpos = new HashSet<int>(new[] { m_idx });
121 m_lastpos = new HashSet<int>(new[] { m_idx }); 134 m_lastpos = new HashSet<int>(new[] { m_idx });
122 Followpos(m_idx); 135 Followpos(m_idx);
123 m_ends.Add(m_idx); 136 m_ends.Add(m_idx);
124 m_tags.Add(m_idx, token.Tag); 137 }
125 } 138
126 139 public void BuildDFA() {
127 public void Visit(EndToken token) { 140 AddState(m_firstpos);
128 if (m_root == null) 141 SetInitialState(m_firstpos);
129 m_root = token; 142
130 m_idx++; 143 if(IsFinal(m_firstpos))
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; 144 MarkFinalState(m_firstpos);
132 m_firstpos = new HashSet<int>(new[] { m_idx });
133 m_lastpos = new HashSet<int>(new[] { m_idx });
134 Followpos(m_idx);
135 m_ends.Add(m_idx);
136 }
137
138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
139 Safe.ArgumentNotNull(dfa,"dfa");
140
141 var states = new MapAlphabet<HashSet<int>>(
142 false,
143 new CustomEqualityComparer<HashSet<int>>(
144 (x, y) => x.SetEquals(y),
145 x => x.Sum(n => n.GetHashCode())
146 ));
147
148 var initialState = states.DefineSymbol(m_firstpos);
149 dfa.SetInitialState(initialState);
150
151 var tags = GetStateTags(m_firstpos);
152 if (tags != null && tags.Length > 0)
153 dfa.MarkFinalState(initialState, tags);
154 145
155 var inputMax = m_indexes.Values.Max(); 146 var inputMax = m_indexes.Values.Max();
156 var queue = new Queue<HashSet<int>>(); 147 var queue = new Queue<HashSet<int>>();
157 148
158 queue.Enqueue(m_firstpos); 149 queue.Enqueue(m_firstpos);
159 150
160 while (queue.Count > 0) { 151 while (queue.Count > 0) {
161 var state = queue.Dequeue(); 152 var s1 = queue.Dequeue();
162 var s1 = states.Translate(state);
163 Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
164 153
165 for (int a = 0; a <= inputMax; a++) { 154 for (int a = 0; a <= inputMax; a++) {
166 var next = new HashSet<int>(); 155 var s2 = new HashSet<int>();
167 foreach (var p in state) { 156 foreach (var p in s1) {
168 if (m_indexes[p] == a) { 157 if (m_indexes[p] == a) {
169 next.UnionWith(Followpos(p)); 158 s2.UnionWith(Followpos(p));
170 } 159 }
171 } 160 }
172 if (next.Count > 0) { 161 if (s2.Count > 0) {
173 int s2; 162 if (!HasState(s2)) {
174 if (states.Contains(next)) { 163 AddState(s2);
175 s2 = states.Translate(next); 164 if (IsFinal(s2))
176 } else { 165 MarkFinalState(s2);
177 s2 = states.DefineSymbol(next);
178
179 if (IsFinal(next)) {
180
181 dfa.MarkFinalState(s2);
182 tags = GetStateTags(next);
183 if (tags != null && tags.Length > 0)
184 dfa.SetStateTag(s2, tags);
185 }
186 166
187 queue.Enqueue(next); 167 queue.Enqueue(s2);
188 } 168 }
189 dfa.Add(new AutomatonTransition(s1, s2, a)); 169
170 DefineTransition(s1, s2, a);
190 } 171 }
172
191 } 173 }
192 } 174 }
175 }
176
177 protected bool HasState(HashSet<int> state) {
178 return m_states.Contains(state);
179 }
180
181 protected void AddState(HashSet<int> state) {
182 Debug.Assert(!HasState(state));
183
184 m_states.DefineSymbol(state);
185 }
186
187 protected int Translate(HashSet<int> state) {
188 Debug.Assert(HasState(state));
189
190 return m_states.Translate(state);
191 }
192
193 protected virtual void SetInitialState(HashSet<int> state) {
194 m_builder.SetInitialState(Translate(state));
195 }
196
197 protected virtual void MarkFinalState(HashSet<int> state) {
198 m_builder.MarkFinalState(Translate(state));
199 }
200
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
202
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
193 } 204 }
194 205
195 bool IsFinal(IEnumerable<int> state) { 206 bool IsFinal(IEnumerable<int> state) {
196 Debug.Assert(state != null); 207 Debug.Assert(state != null);
197 return state.Any(m_ends.Contains); 208 return state.Any(m_ends.Contains);
198 } 209 }
199 210
200 TTag[] GetStateTags(IEnumerable<int> state) {
201 Debug.Assert(state != null);
202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
203 }
204
205 } 211 }
206 } 212 }