158
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Diagnostics;
|
|
5 using System.Linq;
|
|
6
|
|
7 namespace Implab.Parsing {
|
161
|
8 public class DFADefinition<TInput, TState, TTag> : IDFADefinition<TInput, TState, TTag> {
|
|
9 readonly List<DFAStateDescriptior<TTag>> m_states;
|
158
|
10
|
|
11 public const int INITIAL_STATE = 1;
|
|
12 public const int UNREACHEBLE_STATE = 0;
|
|
13
|
161
|
14 DFAStateDescriptior<TTag>[] m_statesArray;
|
158
|
15 readonly int m_alpabetSize;
|
|
16
|
|
17 public DFADefinition(int alphabetSize) {
|
161
|
18 m_states = new List<DFAStateDescriptior<TTag>>();
|
158
|
19 m_alpabetSize = alphabetSize;
|
|
20
|
161
|
21 m_states.Add(new DFAStateDescriptior<TTag>());
|
158
|
22 }
|
|
23
|
|
24 public bool InitialStateIsFinal {
|
|
25 get {
|
|
26 return m_states[INITIAL_STATE].final;
|
|
27 }
|
|
28 }
|
|
29
|
|
30 public int AddState() {
|
|
31 var index = m_states.Count;
|
161
|
32 m_states.Add(new DFAStateDescriptior<TTag> {
|
158
|
33 final = false,
|
|
34 transitions = new int[AlphabetSize]
|
|
35 });
|
|
36 m_statesArray = null;
|
|
37
|
|
38 return index;
|
|
39 }
|
|
40
|
161
|
41 public int AddState(TTag[] tag) {
|
158
|
42 var index = m_states.Count;
|
|
43 bool final = tag != null && tag.Length != 0;
|
161
|
44 m_states.Add(new DFAStateDescriptior<TTag> {
|
158
|
45 final = final,
|
|
46 transitions = new int[AlphabetSize],
|
|
47 tag = final ? tag : null
|
|
48 });
|
|
49 m_statesArray = null;
|
|
50 return index;
|
|
51 }
|
|
52
|
161
|
53 public void DefineTransition(TState s1, TState s2, TInput symbol) {
|
|
54 int is1 = StateAlphabet.Translate(s1);
|
|
55 int is2 = StateAlphabet.Translate(s2);
|
|
56 int isym = InputAlphabet.Translate(symbol);
|
158
|
57
|
161
|
58 Safe.ArgumentAssert(is1 != 0, "s1");
|
|
59 Safe.ArgumentAssert(is2 != 0, "s2");
|
|
60 Safe.ArgumentAssert(isym != 0, "symbol");
|
|
61
|
|
62 m_states[is1].transitions[isym] = is2;
|
158
|
63 }
|
|
64
|
161
|
65 #region IDFADefinition implementation
|
|
66
|
|
67 public DFAStateDescriptior<TTag>[] GetTransitionTable() {
|
|
68 if (m_statesArray == null)
|
|
69 m_statesArray = m_states.ToArray();
|
|
70 return m_statesArray;
|
|
71 }
|
|
72
|
|
73 public IAlphabet<TInput> InputAlphabet {
|
|
74 get {
|
|
75 throw new NotImplementedException();
|
|
76 }
|
|
77 }
|
|
78
|
|
79 public IAlphabet<TState> StateAlphabet {
|
|
80 get {
|
|
81 throw new NotImplementedException();
|
|
82 }
|
|
83 }
|
|
84
|
|
85 #endregion
|
|
86
|
|
87 protected IDFADefinition<> Optimize<TA>(Func<IAlphabet<TA>, IDFADefinition> dfaFactory,IAlphabet<TA> sourceAlphabet, IAlphabet<TA> minimalAlphabet) {
|
160
|
88 Safe.ArgumentNotNull(dfaFactory, "dfaFactory");
|
158
|
89 Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet");
|
|
90
|
|
91 var setComparer = new CustomEqualityComparer<HashSet<int>>(
|
|
92 (x, y) => x.SetEquals(y),
|
|
93 (s) => s.Sum(x => x.GetHashCode())
|
|
94 );
|
|
95
|
|
96 var arrayComparer = new CustomEqualityComparer<int[]>(
|
|
97 (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)),
|
|
98 (a) => a.Sum(x => x.GetHashCode())
|
|
99 );
|
|
100
|
|
101 var optimalStates = new HashSet<HashSet<int>>(setComparer);
|
|
102 var queue = new HashSet<HashSet<int>>(setComparer);
|
|
103
|
|
104 foreach (var g in Enumerable
|
|
105 .Range(INITIAL_STATE, m_states.Count-1)
|
|
106 .Select(i => new {
|
|
107 index = i,
|
|
108 descriptor = m_states[i]
|
|
109 })
|
|
110 .Where(x => x.descriptor.final)
|
|
111 .GroupBy(x => x.descriptor.tag, arrayComparer)
|
|
112 ) {
|
|
113 optimalStates.Add(new HashSet<int>(g.Select(x => x.index)));
|
|
114 }
|
|
115
|
|
116 var state = new HashSet<int>(
|
|
117 Enumerable
|
|
118 .Range(INITIAL_STATE, m_states.Count - 1)
|
|
119 .Where(i => !m_states[i].final)
|
|
120 );
|
|
121 optimalStates.Add(state);
|
|
122 queue.Add(state);
|
|
123
|
|
124 while (queue.Count > 0) {
|
|
125 var stateA = queue.First();
|
|
126 queue.Remove(stateA);
|
|
127
|
|
128 for (int c = 0; c < AlphabetSize; c++) {
|
|
129 var stateX = new HashSet<int>();
|
|
130
|
|
131 for(int s = 1; s < m_states.Count; s++) {
|
|
132 if (stateA.Contains(m_states[s].transitions[c]))
|
|
133 stateX.Add(s);
|
|
134 }
|
|
135
|
|
136 foreach (var stateY in optimalStates.ToArray()) {
|
|
137 if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
|
|
138 var stateR1 = new HashSet<int>(stateY);
|
|
139 var stateR2 = new HashSet<int>(stateY);
|
|
140
|
|
141 stateR1.IntersectWith(stateX);
|
|
142 stateR2.ExceptWith(stateX);
|
|
143
|
|
144 optimalStates.Remove(stateY);
|
|
145 optimalStates.Add(stateR1);
|
|
146 optimalStates.Add(stateR2);
|
|
147
|
|
148 if (queue.Contains(stateY)) {
|
|
149 queue.Remove(stateY);
|
|
150 queue.Add(stateR1);
|
|
151 queue.Add(stateR2);
|
|
152 } else {
|
|
153 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
|
|
154 }
|
|
155 }
|
|
156 }
|
|
157 }
|
|
158 }
|
|
159
|
|
160 // строим карты соотвествия оптимальных состояний с оригинальными
|
|
161
|
|
162 var initialState = optimalStates.Single(x => x.Contains(INITIAL_STATE));
|
|
163
|
|
164 // карта получения оптимального состояния по соотвествующему ему простому состоянию
|
|
165 int[] reveseOptimalMap = new int[m_states.Count];
|
|
166 // карта с индексами оптимальных состояний
|
|
167 HashSet<int>[] optimalMap = new HashSet<int>[optimalStates.Count + 1];
|
|
168 {
|
|
169 optimalMap[0] = new HashSet<int>(); // unreachable state
|
|
170 optimalMap[1] = initialState; // initial state
|
|
171 foreach (var ss in initialState)
|
|
172 reveseOptimalMap[ss] = 1;
|
|
173
|
|
174 int i = 2;
|
|
175 foreach (var s in optimalStates) {
|
|
176 if (s.SetEquals(initialState))
|
|
177 continue;
|
|
178 optimalMap[i] = s;
|
|
179 foreach (var ss in s)
|
|
180 reveseOptimalMap[ss] = i;
|
|
181 i++;
|
|
182 }
|
|
183 }
|
|
184
|
|
185 // получаем минимальный алфавит
|
|
186
|
|
187 var minClasses = new HashSet<HashSet<int>>(setComparer);
|
|
188 var alphaQueue = new Queue<HashSet<int>>();
|
|
189 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
|
|
190
|
|
191 for (int s = 1 ; s < optimalMap.Length; s++) {
|
|
192 var newQueue = new Queue<HashSet<int>>();
|
|
193
|
|
194 foreach (var A in alphaQueue) {
|
|
195 if (A.Count == 1) {
|
|
196 minClasses.Add(A);
|
|
197 continue;
|
|
198 }
|
|
199
|
|
200 // различаем классы символов, которые переводят в различные оптимальные состояния
|
|
201 // optimalState -> alphaClass
|
|
202 var classes = new Dictionary<int, HashSet<int>>();
|
|
203
|
|
204 foreach (var term in A) {
|
|
205 // ищем все переходы класса по символу term
|
|
206 var s2 = reveseOptimalMap[
|
|
207 optimalMap[s].Select(x => m_states[x].transitions[term]).FirstOrDefault(x => x != 0) // первое допустимое элементарное состояние, если есть
|
|
208 ];
|
|
209
|
|
210 HashSet<int> A2;
|
|
211 if (!classes.TryGetValue(s2, out A2)) {
|
|
212 A2 = new HashSet<int>();
|
|
213 newQueue.Enqueue(A2);
|
|
214 classes[s2] = A2;
|
|
215 }
|
|
216 A2.Add(term);
|
|
217 }
|
|
218 }
|
|
219
|
|
220 if (newQueue.Count == 0)
|
|
221 break;
|
|
222 alphaQueue = newQueue;
|
|
223 }
|
|
224
|
|
225 foreach (var A in alphaQueue)
|
|
226 minClasses.Add(A);
|
|
227
|
|
228 var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses);
|
|
229
|
|
230 // построение автомата
|
|
231
|
160
|
232 var minimalDFA = dfaFactory(minimalAlphabet);
|
|
233
|
158
|
234 var states = new int[ optimalMap.Length ];
|
|
235 states[0] = UNREACHEBLE_STATE;
|
|
236
|
|
237 for(var s = INITIAL_STATE; s < states.Length; s++) {
|
|
238 var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty<int>()).Distinct().ToArray();
|
|
239 if (tags.Length > 0)
|
|
240 states[s] = minimalDFA.AddState(tags);
|
|
241 else
|
|
242 states[s] = minimalDFA.AddState();
|
|
243 }
|
|
244
|
|
245 Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE);
|
|
246
|
|
247 for (int s1 = 1; s1 < m_states.Count; s1++) {
|
|
248 for (int c = 0; c < AlphabetSize; c++) {
|
|
249 var s2 = m_states[s1].transitions[c];
|
|
250 if (s2 != UNREACHEBLE_STATE) {
|
|
251 minimalDFA.DefineTransition(
|
|
252 reveseOptimalMap[s1],
|
|
253 reveseOptimalMap[s2],
|
|
254 alphabetMap[c]
|
|
255 );
|
|
256 }
|
|
257 }
|
|
258 }
|
|
259
|
160
|
260 return minimalDFA;
|
158
|
261 }
|
|
262
|
|
263 public void PrintDFA<TA>(IAlphabet<TA> alphabet) {
|
|
264
|
|
265 var reverseMap = alphabet.CreateReverseMap();
|
|
266
|
|
267 for (int i = 1; i < reverseMap.Length; i++) {
|
|
268 Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i]));
|
|
269 }
|
|
270
|
|
271 for (int i = 1; i < m_states.Count; i++) {
|
|
272 var s = m_states[i];
|
|
273 for (int c = 0; c < AlphabetSize; c++)
|
|
274 if (s.transitions[c] != UNREACHEBLE_STATE)
|
|
275 Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : "");
|
|
276 }
|
|
277 }
|
|
278
|
|
279 public int AlphabetSize {
|
159
|
280 get {
|
|
281 return m_alpabetSize;
|
|
282 }
|
158
|
283 }
|
|
284 }
|
|
285 }
|