Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 190:1c2a16d071a7 v2
Слияние с ref20160224
author | cin |
---|---|
date | Fri, 22 Apr 2016 13:08:08 +0300 |
parents | d5c5db0335ee |
children | 302ca905c19e |
comparison
equal
deleted
inserted
replaced
161:2a8466f0cb8a | 190:1c2a16d071a7 |
---|---|
1 using Implab; | |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 | |
7 namespace Implab.Automaton.RegularExpressions { | |
8 /// <summary> | |
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
10 /// регулярное выражение и вычисляет followpos, затем используется метод | |
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
12 /// </summary> | |
13 public class RegularExpressionVisitor : IVisitor { | |
14 int m_idx; | |
15 Token m_root; | |
16 HashSet<int> m_firstpos; | |
17 HashSet<int> m_lastpos; | |
18 | |
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
21 readonly HashSet<int> m_ends = new HashSet<int>(); | |
22 | |
23 readonly IDFATableBuilder m_builder; | |
24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( | |
25 false, | |
26 new CustomEqualityComparer<HashSet<int>>( | |
27 (x, y) => x.SetEquals(y), | |
28 x => x.Sum(n => n.GetHashCode()) | |
29 ) | |
30 ); | |
31 | |
32 public RegularExpressionVisitor(IDFATableBuilder builder) { | |
33 Safe.ArgumentNotNull(builder, "builder"); | |
34 | |
35 m_builder = builder; | |
36 } | |
37 | |
38 HashSet<int> Followpos(int pos) { | |
39 HashSet<int> set; | |
40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); | |
41 } | |
42 | |
43 bool Nullable(object n) { | |
44 if (n is EmptyToken || n is StarToken) | |
45 return true; | |
46 var altToken = n as AltToken; | |
47 if (altToken != null) | |
48 return Nullable(altToken.Left) || Nullable(altToken.Right); | |
49 var catToken = n as CatToken; | |
50 if (catToken != null) | |
51 return Nullable(catToken.Left) && Nullable(catToken.Right); | |
52 return false; | |
53 } | |
54 | |
55 protected int Index { | |
56 get { return m_idx; } | |
57 } | |
58 | |
59 public void Visit(AltToken token) { | |
60 if (m_root == null) | |
61 m_root = token; | |
62 var firtspos = new HashSet<int>(); | |
63 var lastpos = new HashSet<int>(); | |
64 | |
65 token.Left.Accept(this); | |
66 firtspos.UnionWith(m_firstpos); | |
67 lastpos.UnionWith(m_lastpos); | |
68 | |
69 token.Right.Accept(this); | |
70 firtspos.UnionWith(m_firstpos); | |
71 lastpos.UnionWith(m_lastpos); | |
72 | |
73 m_firstpos = firtspos; | |
74 m_lastpos = lastpos; | |
75 } | |
76 | |
77 public void Visit(StarToken token) { | |
78 if (m_root == null) | |
79 m_root = token; | |
80 token.Token.Accept(this); | |
81 | |
82 foreach (var i in m_lastpos) | |
83 Followpos(i).UnionWith(m_firstpos); | |
84 } | |
85 | |
86 public void Visit(CatToken token) { | |
87 if (m_root == null) | |
88 m_root = token; | |
89 | |
90 var firtspos = new HashSet<int>(); | |
91 var lastpos = new HashSet<int>(); | |
92 token.Left.Accept(this); | |
93 firtspos.UnionWith(m_firstpos); | |
94 var leftLastpos = m_lastpos; | |
95 | |
96 token.Right.Accept(this); | |
97 lastpos.UnionWith(m_lastpos); | |
98 var rightFirstpos = m_firstpos; | |
99 | |
100 if (Nullable(token.Left)) | |
101 firtspos.UnionWith(rightFirstpos); | |
102 | |
103 if (Nullable(token.Right)) | |
104 lastpos.UnionWith(leftLastpos); | |
105 | |
106 m_firstpos = firtspos; | |
107 m_lastpos = lastpos; | |
108 | |
109 foreach (var i in leftLastpos) | |
110 Followpos(i).UnionWith(rightFirstpos); | |
111 | |
112 } | |
113 | |
114 public void Visit(EmptyToken token) { | |
115 if (m_root == null) | |
116 m_root = token; | |
117 } | |
118 | |
119 public void Visit(SymbolToken token) { | |
120 if (m_root == null) | |
121 m_root = token; | |
122 m_idx++; | |
123 m_indexes[m_idx] = token.Value; | |
124 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
125 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
126 } | |
127 | |
128 public virtual void Visit(EndToken token) { | |
129 if (m_root == null) | |
130 m_root = token; | |
131 m_idx++; | |
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; | |
133 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
134 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
135 Followpos(m_idx); | |
136 m_ends.Add(m_idx); | |
137 } | |
138 | |
139 public void BuildDFA() { | |
140 AddState(m_firstpos); | |
141 SetInitialState(m_firstpos); | |
142 | |
143 if(IsFinal(m_firstpos)) | |
144 MarkFinalState(m_firstpos); | |
145 | |
146 var inputMax = m_indexes.Values.Max(); | |
147 var queue = new Queue<HashSet<int>>(); | |
148 | |
149 queue.Enqueue(m_firstpos); | |
150 | |
151 while (queue.Count > 0) { | |
152 var s1 = queue.Dequeue(); | |
153 | |
154 for (int a = 0; a <= inputMax; a++) { | |
155 var s2 = new HashSet<int>(); | |
156 foreach (var p in s1) { | |
157 if (m_indexes[p] == a) { | |
158 s2.UnionWith(Followpos(p)); | |
159 } | |
160 } | |
161 if (s2.Count > 0) { | |
162 if (!HasState(s2)) { | |
163 AddState(s2); | |
164 if (IsFinal(s2)) | |
165 MarkFinalState(s2); | |
166 | |
167 queue.Enqueue(s2); | |
168 } | |
169 | |
170 DefineTransition(s1, s2, a); | |
171 } | |
172 | |
173 } | |
174 } | |
175 } | |
176 | |
177 protected bool HasState(HashSet<int> state) { | |
178 return m_states.Contains(state); | |
179 } | |
180 | |
181 protected void AddState(HashSet<int> state) { | |
182 Debug.Assert(!HasState(state)); | |
183 | |
184 m_states.DefineSymbol(state); | |
185 } | |
186 | |
187 protected int Translate(HashSet<int> state) { | |
188 Debug.Assert(HasState(state)); | |
189 | |
190 return m_states.Translate(state); | |
191 } | |
192 | |
193 protected virtual void SetInitialState(HashSet<int> state) { | |
194 m_builder.SetInitialState(Translate(state)); | |
195 } | |
196 | |
197 protected virtual void MarkFinalState(HashSet<int> state) { | |
198 m_builder.MarkFinalState(Translate(state)); | |
199 } | |
200 | |
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { | |
202 | |
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); | |
204 } | |
205 | |
206 bool IsFinal(IEnumerable<int> state) { | |
207 Debug.Assert(state != null); | |
208 return state.Any(m_ends.Contains); | |
209 } | |
210 | |
211 } | |
212 } |