Mercurial > pub > ImplabNet
annotate Implab/Parsing/DFADefinitionBase.cs @ 156:97fbbf816844 v2
Promises: SignalXXX methods merged into SignalHandler method.
Components: RunnableComponent In progress
author | cin |
---|---|
date | Mon, 15 Feb 2016 04:22:15 +0300 |
parents | c0bf853aa04f |
children |
rev | line source |
---|---|
55 | 1 using Implab; |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 using System.Text; | |
7 using System.Threading.Tasks; | |
8 | |
9 namespace Implab.Parsing { | |
10 public abstract class DFADefinitionBase : IDFADefinition { | |
11 readonly List<DFAStateDescriptior> m_states; | |
12 | |
13 public const int INITIAL_STATE = 1; | |
14 public const int UNREACHEBLE_STATE = 0; | |
15 | |
16 DFAStateDescriptior[] m_statesArray; | |
17 | |
156
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
18 protected DFADefinitionBase() { |
55 | 19 m_states = new List<DFAStateDescriptior>(); |
20 | |
21 m_states.Add(new DFAStateDescriptior()); | |
22 } | |
23 | |
24 public DFAStateDescriptior[] States { | |
25 get { | |
26 if (m_statesArray == null) | |
27 m_statesArray = m_states.ToArray(); | |
28 return m_statesArray; | |
29 } | |
30 } | |
31 | |
32 public bool InitialStateIsFinal { | |
33 get { | |
34 return m_states[INITIAL_STATE].final; | |
35 } | |
36 } | |
37 | |
38 public int AddState() { | |
39 var index = m_states.Count; | |
40 m_states.Add(new DFAStateDescriptior { | |
41 final = false, | |
42 transitions = new int[AlphabetSize] | |
43 }); | |
44 | |
45 return index; | |
46 } | |
47 | |
48 public int AddState(int[] tag) { | |
49 var index = m_states.Count; | |
156
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
50 bool final = tag != null && tag.Length != 0; |
55 | 51 m_states.Add(new DFAStateDescriptior { |
52 final = final, | |
53 transitions = new int[AlphabetSize], | |
54 tag = final ? tag : null | |
55 }); | |
56 return index; | |
57 } | |
58 | |
59 public void DefineTransition(int s1,int s2, int symbol) { | |
60 Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); | |
61 Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); | |
62 Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); | |
63 | |
64 m_states[s1].transitions[symbol] = s2; | |
65 } | |
66 | |
67 protected void Optimize<TA>(IDFADefinition minimalDFA,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) { | |
68 Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); | |
69 Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); | |
70 | |
71 var setComparer = new CustomEqualityComparer<HashSet<int>>( | |
72 (x, y) => x.SetEquals(y), | |
73 (s) => s.Sum(x => x.GetHashCode()) | |
74 ); | |
75 | |
76 var arrayComparer = new CustomEqualityComparer<int[]>( | |
77 (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), | |
78 (a) => a.Sum(x => x.GetHashCode()) | |
79 ); | |
80 | |
81 var optimalStates = new HashSet<HashSet<int>>(setComparer); | |
82 var queue = new HashSet<HashSet<int>>(setComparer); | |
83 | |
84 foreach (var g in Enumerable | |
85 .Range(INITIAL_STATE, m_states.Count-1) | |
86 .Select(i => new { | |
87 index = i, | |
88 descriptor = m_states[i] | |
89 }) | |
90 .Where(x => x.descriptor.final) | |
91 .GroupBy(x => x.descriptor.tag, arrayComparer) | |
92 ) { | |
93 optimalStates.Add(new HashSet<int>(g.Select(x => x.index))); | |
94 } | |
95 | |
96 var state = new HashSet<int>( | |
97 Enumerable | |
98 .Range(INITIAL_STATE, m_states.Count - 1) | |
99 .Where(i => !m_states[i].final) | |
100 ); | |
101 optimalStates.Add(state); | |
102 queue.Add(state); | |
103 | |
104 while (queue.Count > 0) { | |
105 var stateA = queue.First(); | |
106 queue.Remove(stateA); | |
107 | |
108 for (int c = 0; c < AlphabetSize; c++) { | |
109 var stateX = new HashSet<int>(); | |
110 | |
111 for(int s = 1; s < m_states.Count; s++) { | |
112 if (stateA.Contains(m_states[s].transitions[c])) | |
113 stateX.Add(s); | |
114 } | |
115 | |
116 foreach (var stateY in optimalStates.ToArray()) { | |
117 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { | |
118 var stateR1 = new HashSet<int>(stateY); | |
119 var stateR2 = new HashSet<int>(stateY); | |
120 | |
121 stateR1.IntersectWith(stateX); | |
122 stateR2.ExceptWith(stateX); | |
123 | |
124 optimalStates.Remove(stateY); | |
125 optimalStates.Add(stateR1); | |
126 optimalStates.Add(stateR2); | |
127 | |
128 if (queue.Contains(stateY)) { | |
129 queue.Remove(stateY); | |
130 queue.Add(stateR1); | |
131 queue.Add(stateR2); | |
132 } else { | |
133 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); | |
134 } | |
135 } | |
136 } | |
137 } | |
138 } | |
139 | |
140 // строим карты соотвествия оптимальных состояний с оригинальными | |
141 | |
156
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
142 var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE)); |
55 | 143 |
144 // карта получения оптимального состояния по соотвествующему ему простому состоянию | |
145 int[] reveseOptimalMap = new int[m_states.Count]; | |
146 // карта с индексами оптимальных состояний | |
147 HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1]; | |
148 { | |
149 optimalMap[0] = new HashSet<int>(); // unreachable state | |
150 optimalMap[1] = initialState; // initial state | |
151 foreach (var ss in initialState) | |
152 reveseOptimalMap[ss] = 1; | |
153 | |
154 int i = 2; | |
155 foreach (var s in optimalStates) { | |
156 if (s.SetEquals(initialState)) | |
157 continue; | |
158 optimalMap[i] = s; | |
159 foreach (var ss in s) | |
160 reveseOptimalMap[ss] = i; | |
161 i++; | |
162 } | |
163 } | |
164 | |
165 // получаем минимальный алфавит | |
166 | |
167 var minClasses = new HashSet<HashSet<int>>(setComparer); | |
168 var alphaQueue = new Queue<HashSet<int>>(); | |
169 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize))); | |
170 | |
171 for (int s = 1 ; s < optimalMap.Length; s++) { | |
172 var newQueue = new Queue<HashSet<int>>(); | |
173 | |
174 foreach (var A in alphaQueue) { | |
175 if (A.Count == 1) { | |
176 minClasses.Add(A); | |
177 continue; | |
178 } | |
179 | |
180 // различаем классы символов, которые переводят в различные оптимальные состояния | |
181 // optimalState -> alphaClass | |
182 var classes = new Dictionary<int, HashSet<int>>(); | |
183 | |
184 foreach (var term in A) { | |
185 // ищем все переходы класса по символу term | |
186 var s2 = reveseOptimalMap[ | |
156
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
187 optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть |
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
188 ]; |
55 | 189 |
190 HashSet<int> A2; | |
191 if (!classes.TryGetValue(s2, out A2)) { | |
192 A2 = new HashSet<int>(); | |
193 newQueue.Enqueue(A2); | |
194 classes[s2] = A2; | |
195 } | |
196 A2.Add(term); | |
197 } | |
198 } | |
199 | |
200 if (newQueue.Count == 0) | |
201 break; | |
202 alphaQueue = newQueue; | |
203 } | |
204 | |
205 foreach (var A in alphaQueue) | |
206 minClasses.Add(A); | |
207 | |
208 var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); | |
209 | |
210 // построение автомата | |
211 | |
212 var states = new int[ optimalMap.Length ]; | |
213 states[0] = UNREACHEBLE_STATE; | |
214 | |
215 for(var s = INITIAL_STATE; s < states.Length; s++) { | |
216 var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray(); | |
217 if (tags.Length > 0) | |
218 states[s] = minimalDFA.AddState(tags); | |
219 else | |
220 states[s] = minimalDFA.AddState(); | |
221 } | |
222 | |
223 Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); | |
224 | |
225 for (int s1 = 1; s1 < m_states.Count; s1++) { | |
226 for (int c = 0; c < AlphabetSize; c++) { | |
227 var s2 = m_states[s1].transitions[c]; | |
228 if (s2 != UNREACHEBLE_STATE) { | |
229 minimalDFA.DefineTransition( | |
230 reveseOptimalMap[s1], | |
231 reveseOptimalMap[s2], | |
232 alphabetMap[c] | |
233 ); | |
234 } | |
235 } | |
236 } | |
237 | |
238 } | |
239 | |
240 protected void PrintDFA<TA>(IAlphabet<TA> alphabet) { | |
241 | |
242 var reverseMap = alphabet.CreateReverseMap(); | |
243 | |
244 for (int i = 1; i < reverseMap.Length; i++) { | |
245 Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); | |
246 } | |
247 | |
248 for (int i = 1; i < m_states.Count; i++) { | |
249 var s = m_states[i]; | |
250 for (int c = 0; c < AlphabetSize; c++) | |
251 if (s.transitions[c] != UNREACHEBLE_STATE) | |
252 Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); | |
253 } | |
254 } | |
255 | |
256 public abstract int AlphabetSize { | |
257 get; | |
258 } | |
259 } | |
260 } |