annotate Implab/Automaton/DFATable.cs @ 187:dd4a3590f9c6 ref20160224

Reworked cancelation handling, if the cancel handler isn't specified the OperationCanceledException will be handled by the error handler Any unhandled OperationCanceledException will cause the promise cancelation
author cin
date Tue, 19 Apr 2016 17:35:20 +0300
parents 4f82e0f161c3
children 5f7a3e1d32b9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
1 using Implab;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
2 using System;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
3 using System.Collections.Generic;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
4 using System.Linq;
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
5 using System.Diagnostics;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
6 using System.IO;
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
7 using System.CodeDom.Compiler;
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
8 using System.CodeDom;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
9
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
10 namespace Implab.Automaton {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
11 public class DFATable : IDFATableBuilder {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
12 int m_stateCount;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
13 int m_symbolCount;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
14 int m_initialState;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
15
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
16 readonly HashSet<int> m_finalStates = new HashSet<int>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
18
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
19
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
20 #region IDFADefinition implementation
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
21
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
22 public bool IsFinalState(int s) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
24
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
25 return m_finalStates.Contains(s);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
26 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
27
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
28 public IEnumerable<int> FinalStates {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
29 get {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
30 return m_finalStates;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
31 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
32 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
33
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
34 public int StateCount {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
35 get { return m_stateCount; }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
36 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
37
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
38 public int AlphabetSize {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
39 get { return m_symbolCount; }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
40 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
41
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
42 public int InitialState {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
43 get { return m_initialState; }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
44 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
45
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
46 #endregion
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
47
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
48 public void SetInitialState(int s) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
49 Safe.ArgumentAssert(s >= 0, "s");
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
50 m_stateCount = Math.Max(m_stateCount, s + 1);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
51 m_initialState = s;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
52 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
53
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
54 public void MarkFinalState(int state) {
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
55 m_stateCount = Math.Max(m_stateCount, state + 1);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
56 m_finalStates.Add(state);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
57 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
58
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
59 public void Add(AutomatonTransition item) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
60 Safe.ArgumentAssert(item.s1 >= 0, "item");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
61 Safe.ArgumentAssert(item.s2 >= 0, "item");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
62 Safe.ArgumentAssert(item.edge >= 0, "item");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
63
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
66
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
67 m_transitions.Add(item);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
68 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
69
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
70 public void Clear() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
71 m_stateCount = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
72 m_symbolCount = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
73 m_finalStates.Clear();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
74 m_transitions.Clear();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
75 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
76
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
77 public bool Contains(AutomatonTransition item) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
78 return m_transitions.Contains(item);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
79 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
80
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
82 m_transitions.CopyTo(array, arrayIndex);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
83 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
84
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
85 public bool Remove(AutomatonTransition item) {
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
86 return m_transitions.Remove(item);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
87 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
88
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
89 public int Count {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
90 get {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
91 return m_transitions.Count;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
92 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
93 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
94
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
95 public bool IsReadOnly {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
96 get {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
97 return false;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
98 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
99 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
100
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
101 public IEnumerator<AutomatonTransition> GetEnumerator() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
102 return m_transitions.GetEnumerator();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
103 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
104
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
106 return GetEnumerator();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
107 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
108
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
109 public void AddSymbol(int symbol) {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
112 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
113
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
114 public int[,] CreateTransitionTable() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
115 var table = new int[StateCount,AlphabetSize];
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
116
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
117 for (int i = 0; i < StateCount; i++)
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
118 for (int j = 0; j < AlphabetSize; j++)
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
119 table[i, j] = AutomatonConst.UNREACHABLE_STATE;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
120
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
121 foreach (var t in this)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
122 table[t.s1,t.edge] = t.s2;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
123
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
124 return table;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
125 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
126
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
127 public bool[] CreateFinalStateTable() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
128 var table = new bool[StateCount];
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
129
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
130 foreach (var s in FinalStates)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
131 table[s] = true;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
132
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
133 return table;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
134 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
135
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
136 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
137 /// <remarks>
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
138 /// В процессе построения минимального автомата требуется разделить множество состояний,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
139 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
140 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
142 /// </remarks>
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
143 /// <returns>The final states.</returns>
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
145 return new [] { new HashSet<int>(states) };
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
146 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
147
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
148 protected void Optimize(
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
149 IDFATableBuilder optimalDFA,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
150 IDictionary<int,int> alphabetMap,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
151 IDictionary<int,int> stateMap
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
152 ) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
153 Safe.ArgumentNotNull(optimalDFA, "dfa");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
155 Safe.ArgumentNotNull(stateMap, "stateMap");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
156
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
157
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
159 (x, y) => x.SetEquals(y),
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
160 s => s.Sum(x => x.GetHashCode())
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
161 );
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
162
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
164 var queue = new HashSet<HashSet<int>>(setComparer);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
165
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
166 optimalStates.Add(new HashSet<int>(FinalStates));
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
167
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
168 var state = new HashSet<int>(
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
169 Enumerable
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
170 .Range(0, m_stateCount)
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
171 .Where(i => !m_finalStates.Contains(i))
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
172 );
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
173
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
174 optimalStates.Add(state);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
175 queue.Add(state);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
177 var rmap = m_transitions
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
178 .GroupBy(t => t.s2)
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
179 .ToDictionary(
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
180 g => g.Key, // s2
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
181 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
182 );
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
183
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
184 while (queue.Count > 0) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
185 var stateA = queue.First();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
186 queue.Remove(stateA);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
187
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
188 for (int c = 0; c < m_symbolCount; c++) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
189 var stateX = new HashSet<int>();
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
190 foreach(var a in stateA.Where(rmap.ContainsKey))
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
192
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
193 var tmp = optimalStates.ToArray();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
194 foreach (var stateY in tmp) {
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
195 var stateR1 = new HashSet<int>(stateY);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
196 var stateR2 = new HashSet<int>(stateY);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
197
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
198 stateR1.IntersectWith(stateX);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
199 stateR2.ExceptWith(stateX);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
200
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
201 if (stateR1.Count > 0 && stateR2.Count > 0) {
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
202
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
203
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
204 optimalStates.Remove(stateY);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
205 optimalStates.Add(stateR1);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
206 optimalStates.Add(stateR2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
207
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
208 if (queue.Contains(stateY)) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
209 queue.Remove(stateY);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
210 queue.Add(stateR1);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
211 queue.Add(stateR2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
212 } else {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
213 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
214 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
215 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
216 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
217 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
218 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
219
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
220 // дополнительно разбиваем конечные состояния
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
222 optimalStates.Remove(final);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
223 foreach (var split in SplitFinalStates(final))
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
224 optimalStates.Add(split);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
225 }
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
226
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
227
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
229 var nextState = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
230 foreach (var item in optimalStates) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
231 var id = nextState++;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
232 foreach (var s in item)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
233 stateMap[s] = id;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
234 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
235
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
236 // получаем минимальный алфавит
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
237 // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
238 // для этого используем алгоритм кластеризации, сначала
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
239 // считаем, что все символы не различимы
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
240
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
241 var minClasses = new HashSet<HashSet<int>>(setComparer);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
242 var alphaQueue = new Queue<HashSet<int>>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
243 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
244
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
245 // для всех состояний, будем проверять каждый класс на различимость,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
246 // т.е. символы различимы, если они приводят к разным состояниям
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
247 for (int s = 0 ; s < optimalStates.Count; s++) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
248 var newQueue = new Queue<HashSet<int>>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
249
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
250 foreach (var A in alphaQueue) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
251 // классы из одного символа делить бесполезно, переводим их сразу в
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
252 // результирующий алфавит
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
253 if (A.Count == 1) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
254 minClasses.Add(A);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
255 continue;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
256 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
257
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
258 // различаем классы символов, которые переводят в различные оптимальные состояния
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
259 // optimalState -> alphaClass
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
260 var classes = new Dictionary<int, HashSet<int>>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
261
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
262 foreach (var term in A) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
263 // ищем все переходы класса по символу term
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
264 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
265
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
266 HashSet<int> a2;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
267 if (!classes.TryGetValue(s2, out a2)) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
268 a2 = new HashSet<int>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
269 newQueue.Enqueue(a2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
270 classes[s2] = a2;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
271 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
272 a2.Add(term);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
273 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
274 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
275
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
276 if (newQueue.Count == 0)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
277 break;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
278 alphaQueue = newQueue;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
279 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
280
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
281 // после окончания работы алгоритма в очереди останутся минимальные различимые классы
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
282 // входных символов
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
283 foreach (var A in alphaQueue)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
284 minClasses.Add(A);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
285
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
286 // построение отображения алфавитов входных символов.
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
287 // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
288 // специальное значение, тогда сохраним минимальный класс,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
289 // содержащий этот символ на томже месте.
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
290
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
291 var nextCls = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
292 foreach (var item in minClasses) {
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
293 if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
294 nextCls++;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
295
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
296 // сохраняем DFAConst.UNCLASSIFIED_INPUT
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
297 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
298 optimalDFA.AddSymbol(cls);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
299
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
300 foreach (var a in item)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
301 alphabetMap[a] = cls;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
302 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
303
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
304 // построение автомата
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
305 optimalDFA.SetInitialState(stateMap[m_initialState]);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
306
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
307 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
308 optimalDFA.MarkFinalState(sf);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
309
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
310 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
311 optimalDFA.Add(t);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
312 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
313
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
314 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
315 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
316 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
317
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
318 var data = new List<string>();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
319
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
320 data.Add("digraph dfa {");
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
321
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
322 foreach (var final in m_finalStates)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
323 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
324
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
325 foreach (var t in m_transitions)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
326 data.Add(String.Format(
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
327 "{0} -> {2} [label={1}];",
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
328 String.Join("", stateAlphabet.GetSymbols(t.s1)),
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
330 String.Join("", stateAlphabet.GetSymbols(t.s2))
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
331 ));
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
332 data.Add("}");
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
333 return String.Join("\n", data);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
334 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
335
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
336 static string ToLiteral(string input)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
337 {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
338 using (var writer = new StringWriter())
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
339 {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
340 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
341 {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
342 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
343 return writer.ToString();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
344 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
345 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
346 }
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
347 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
348 }