Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 177:a0ff6a0e9c44 ref20160224
refactoring
author | cin |
---|---|
date | Wed, 23 Mar 2016 01:42:00 +0300 |
parents | 92d5278d1b10 |
children | d5c5db0335ee |
comparison
equal
deleted
inserted
replaced
176:0c3c69fe225b | 177:a0ff6a0e9c44 |
---|---|
10 /// регулярное выражение и вычисляет followpos, затем используется метод | 10 /// регулярное выражение и вычисляет followpos, затем используется метод |
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. |
12 /// </summary> | 12 /// </summary> |
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { | 13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { |
14 int m_idx; | 14 int m_idx; |
15 Token<TTag> m_root; | 15 Token m_root; |
16 HashSet<int> m_firstpos; | 16 HashSet<int> m_firstpos; |
17 HashSet<int> m_lastpos; | 17 HashSet<int> m_lastpos; |
18 | 18 |
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); |
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); |
21 readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | 21 readonly HashSet<int> m_ends = new HashSet<int>(); |
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); | |
22 | 23 |
23 public Dictionary<int, HashSet<int>> FollowposMap { | 24 public Dictionary<int, HashSet<int>> FollowposMap { |
24 get { return m_followpos; } | 25 get { return m_followpos; } |
25 } | 26 } |
26 | 27 |
28 HashSet<int> set; | 29 HashSet<int> set; |
29 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | 30 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); |
30 } | 31 } |
31 | 32 |
32 bool Nullable(object n) { | 33 bool Nullable(object n) { |
33 if (n is EmptyToken<TTag> || n is StarToken<TTag>) | 34 if (n is EmptyToken || n is StarToken) |
34 return true; | 35 return true; |
35 var altToken = n as AltToken<TTag>; | 36 var altToken = n as AltToken; |
36 if (altToken != null) | 37 if (altToken != null) |
37 return Nullable(altToken.Left) || Nullable(altToken.Right); | 38 return Nullable(altToken.Left) || Nullable(altToken.Right); |
38 var catToken = n as CatToken<TTag>; | 39 var catToken = n as CatToken; |
39 if (catToken != null) | 40 if (catToken != null) |
40 return Nullable(catToken.Left) && Nullable(catToken.Right); | 41 return Nullable(catToken.Left) && Nullable(catToken.Right); |
41 return false; | 42 return false; |
42 } | 43 } |
43 | 44 |
44 | 45 |
45 public void Visit(AltToken<TTag> token) { | 46 public void Visit(AltToken token) { |
46 if (m_root == null) | 47 if (m_root == null) |
47 m_root = token; | 48 m_root = token; |
48 var firtspos = new HashSet<int>(); | 49 var firtspos = new HashSet<int>(); |
49 var lastpos = new HashSet<int>(); | 50 var lastpos = new HashSet<int>(); |
50 | 51 |
58 | 59 |
59 m_firstpos = firtspos; | 60 m_firstpos = firtspos; |
60 m_lastpos = lastpos; | 61 m_lastpos = lastpos; |
61 } | 62 } |
62 | 63 |
63 public void Visit(StarToken<TTag> token) { | 64 public void Visit(StarToken token) { |
64 if (m_root == null) | 65 if (m_root == null) |
65 m_root = token; | 66 m_root = token; |
66 token.Token.Accept(this); | 67 token.Token.Accept(this); |
67 | 68 |
68 foreach (var i in m_lastpos) | 69 foreach (var i in m_lastpos) |
69 Followpos(i).UnionWith(m_firstpos); | 70 Followpos(i).UnionWith(m_firstpos); |
70 } | 71 } |
71 | 72 |
72 public void Visit(CatToken<TTag> token) { | 73 public void Visit(CatToken token) { |
73 if (m_root == null) | 74 if (m_root == null) |
74 m_root = token; | 75 m_root = token; |
75 | 76 |
76 var firtspos = new HashSet<int>(); | 77 var firtspos = new HashSet<int>(); |
77 var lastpos = new HashSet<int>(); | 78 var lastpos = new HashSet<int>(); |
95 foreach (var i in leftLastpos) | 96 foreach (var i in leftLastpos) |
96 Followpos(i).UnionWith(rightFirstpos); | 97 Followpos(i).UnionWith(rightFirstpos); |
97 | 98 |
98 } | 99 } |
99 | 100 |
100 public void Visit(EmptyToken<TTag> token) { | 101 public void Visit(EmptyToken token) { |
101 if (m_root == null) | 102 if (m_root == null) |
102 m_root = token; | 103 m_root = token; |
103 } | 104 } |
104 | 105 |
105 public void Visit(SymbolToken<TTag> token) { | 106 public void Visit(SymbolToken token) { |
106 if (m_root == null) | 107 if (m_root == null) |
107 m_root = token; | 108 m_root = token; |
108 m_idx++; | 109 m_idx++; |
109 m_indexes[m_idx] = token.Value; | 110 m_indexes[m_idx] = token.Value; |
110 m_firstpos = new HashSet<int>(new[] { m_idx }); | 111 m_firstpos = new HashSet<int>(new[] { m_idx }); |
117 m_idx++; | 118 m_idx++; |
118 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | 119 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; |
119 m_firstpos = new HashSet<int>(new[] { m_idx }); | 120 m_firstpos = new HashSet<int>(new[] { m_idx }); |
120 m_lastpos = new HashSet<int>(new[] { m_idx }); | 121 m_lastpos = new HashSet<int>(new[] { m_idx }); |
121 Followpos(m_idx); | 122 Followpos(m_idx); |
122 m_ends.Add(m_idx, token.Tag); | 123 m_ends.Add(m_idx); |
124 m_tags.Add(m_idx, token.Tag); | |
125 } | |
126 | |
127 public void Visit(EndToken token) { | |
128 if (m_root == null) | |
129 m_root = token; | |
130 m_idx++; | |
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
132 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
133 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
134 Followpos(m_idx); | |
135 m_ends.Add(m_idx); | |
123 } | 136 } |
124 | 137 |
125 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { | 138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { |
126 Safe.ArgumentNotNull(dfa,"dfa"); | 139 Safe.ArgumentNotNull(dfa,"dfa"); |
127 | 140 |
155 if (m_indexes[p] == a) { | 168 if (m_indexes[p] == a) { |
156 next.UnionWith(Followpos(p)); | 169 next.UnionWith(Followpos(p)); |
157 } | 170 } |
158 } | 171 } |
159 if (next.Count > 0) { | 172 if (next.Count > 0) { |
160 int s2 = states.Translate(next); | 173 int s2; |
161 if (s2 == DFAConst.UNCLASSIFIED_INPUT) { | 174 if (states.Contains(next)) { |
175 s2 = states.Translate(next); | |
176 } else { | |
162 s2 = states.DefineSymbol(next); | 177 s2 = states.DefineSymbol(next); |
163 | 178 |
164 tags = GetStateTags(next); | 179 if (IsFinal(next)) { |
165 if (tags != null && tags.Length > 0) { | 180 |
166 dfa.MarkFinalState(s2); | 181 dfa.MarkFinalState(s2); |
167 dfa.SetStateTag(s2, tags); | 182 tags = GetStateTags(next); |
183 if (tags != null && tags.Length > 0) | |
184 dfa.SetStateTag(s2, tags); | |
168 } | 185 } |
169 | 186 |
170 queue.Enqueue(next); | 187 queue.Enqueue(next); |
171 } | 188 } |
172 dfa.Add(new AutomatonTransition(s1, s2, a)); | 189 dfa.Add(new AutomatonTransition(s1, s2, a)); |
173 } | 190 } |
174 } | 191 } |
175 } | 192 } |
176 } | 193 } |
177 | 194 |
195 bool IsFinal(IEnumerable<int> state) { | |
196 Debug.Assert(state != null); | |
197 return state.Any(m_ends.Contains); | |
198 } | |
199 | |
178 TTag[] GetStateTags(IEnumerable<int> state) { | 200 TTag[] GetStateTags(IEnumerable<int> state) { |
179 Debug.Assert(state != null); | 201 Debug.Assert(state != null); |
180 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | 202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); |
181 } | 203 } |
182 | 204 |
183 } | 205 } |
184 } | 206 } |