annotate Implab/Automaton/DFATable.cs @ 236:302ca905c19e v2

JsonReader optimizations
author cin
date Tue, 21 Nov 2017 14:57:58 +0300
parents 5f7a3e1d32b9
children fa6cbf4d8841
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
1 using Implab;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
2 using System;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
3 using System.Collections.Generic;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
4 using System.Linq;
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
5 using System.Diagnostics;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
6 using System.IO;
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
7 using System.CodeDom.Compiler;
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
8 using System.CodeDom;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
9
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
10 namespace Implab.Automaton {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
11 public class DFATable : IDFATableBuilder {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
12 int m_stateCount;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
13 int m_symbolCount;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
14 int m_initialState;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
15
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
16 readonly HashSet<int> m_finalStates = new HashSet<int>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
17 readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
18
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
19
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
20 #region IDFADefinition implementation
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
21
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
22 public bool IsFinalState(int s) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
23 Safe.ArgumentInRange(s, 0, m_stateCount, "s");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
24
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
25 return m_finalStates.Contains(s);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
26 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
27
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
28 public IEnumerable<int> FinalStates {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
29 get {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
30 return m_finalStates;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
31 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
32 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
33
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
34 public int StateCount {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
35 get { return m_stateCount; }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
36 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
37
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
38 public int AlphabetSize {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
39 get { return m_symbolCount; }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
40 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
41
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
42 public int InitialState {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
43 get { return m_initialState; }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
44 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
45
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
46 #endregion
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
47
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
48 public void SetInitialState(int s) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
49 Safe.ArgumentAssert(s >= 0, "s");
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
50 m_stateCount = Math.Max(m_stateCount, s + 1);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
51 m_initialState = s;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
52 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
53
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
54 public void MarkFinalState(int state) {
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
55 m_stateCount = Math.Max(m_stateCount, state + 1);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
56 m_finalStates.Add(state);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
57 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
58
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
59 public void Add(AutomatonTransition item) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
60 Safe.ArgumentAssert(item.s1 >= 0, "item");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
61 Safe.ArgumentAssert(item.s2 >= 0, "item");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
62 Safe.ArgumentAssert(item.edge >= 0, "item");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
63
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
64 m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
65 m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
66
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
67 m_transitions.Add(item);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
68 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
69
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
70 public void Clear() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
71 m_stateCount = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
72 m_symbolCount = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
73 m_finalStates.Clear();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
74 m_transitions.Clear();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
75 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
76
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
77 public bool Contains(AutomatonTransition item) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
78 return m_transitions.Contains(item);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
79 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
80
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
81 public void CopyTo(AutomatonTransition[] array, int arrayIndex) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
82 m_transitions.CopyTo(array, arrayIndex);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
83 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
84
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
85 public bool Remove(AutomatonTransition item) {
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
86 return m_transitions.Remove(item);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
87 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
88
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
89 public int Count {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
90 get {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
91 return m_transitions.Count;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
92 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
93 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
94
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
95 public bool IsReadOnly {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
96 get {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
97 return false;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
98 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
99 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
100
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
101 public IEnumerator<AutomatonTransition> GetEnumerator() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
102 return m_transitions.GetEnumerator();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
103 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
104
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
105 System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
106 return GetEnumerator();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
107 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
108
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
109 public void AddSymbol(int symbol) {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
110 Safe.ArgumentAssert(symbol >= 0, "symbol");
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
111 m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
112 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
113
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
114 public int[,] CreateTransitionTable() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
115 var table = new int[StateCount,AlphabetSize];
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
116
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
117 for (int i = 0; i < StateCount; i++)
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
118 for (int j = 0; j < AlphabetSize; j++)
236
302ca905c19e JsonReader optimizations
cin
parents: 229
diff changeset
119 table[i, j] = AutomatonConst.UnreachableState;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
120
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
121 foreach (var t in this)
229
5f7a3e1d32b9 JsonXmlReader performance tuning
cin
parents: 183
diff changeset
122 table[t.s1,t.edge] = (byte)t.s2;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
123
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
124 return table;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
125 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
126
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
127 public bool[] CreateFinalStateTable() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
128 var table = new bool[StateCount];
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
129
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
130 foreach (var s in FinalStates)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
131 table[s] = true;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
132
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
133 return table;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
134 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
135
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
136 /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary>
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
137 /// <remarks>
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
138 /// В процессе построения минимального автомата требуется разделить множество состояний,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
139 /// на два подмножества - конечные состояния и все остальные, после чего эти подмножества
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
140 /// будут резделены на более мелкие. Иногда требуется гарантировать различия конечных сосотяний,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
141 /// для этого необходимо переопределить даннцю фукнцию, для получения множеств конечных состояний.
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
142 /// </remarks>
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
143 /// <returns>The final states.</returns>
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
144 protected virtual IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
145 return new [] { new HashSet<int>(states) };
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
146 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
147
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
148 protected void Optimize(
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
149 IDFATableBuilder optimalDFA,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
150 IDictionary<int,int> alphabetMap,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
151 IDictionary<int,int> stateMap
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
152 ) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
153 Safe.ArgumentNotNull(optimalDFA, "dfa");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
154 Safe.ArgumentNotNull(alphabetMap, "alphabetMap");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
155 Safe.ArgumentNotNull(stateMap, "stateMap");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
156
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
157
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
158 var setComparer = new CustomEqualityComparer<HashSet<int>>(
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
159 (x, y) => x.SetEquals(y),
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
160 s => s.Sum(x => x.GetHashCode())
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
161 );
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
162
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
163 var optimalStates = new HashSet<HashSet<int>>(setComparer);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
164 var queue = new HashSet<HashSet<int>>(setComparer);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
165
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
166 optimalStates.Add(new HashSet<int>(FinalStates));
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
167
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
168 var state = new HashSet<int>(
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
169 Enumerable
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
170 .Range(0, m_stateCount)
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
171 .Where(i => !m_finalStates.Contains(i))
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
172 );
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
173
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
174 optimalStates.Add(state);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
175 queue.Add(state);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
177 var rmap = m_transitions
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
178 .GroupBy(t => t.s2)
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
179 .ToDictionary(
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
180 g => g.Key, // s2
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
181 g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
182 );
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
183
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
184 while (queue.Count > 0) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
185 var stateA = queue.First();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
186 queue.Remove(stateA);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
187
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
188 for (int c = 0; c < m_symbolCount; c++) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
189 var stateX = new HashSet<int>();
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
190 foreach(var a in stateA.Where(rmap.ContainsKey))
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
191 stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
192
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
193 var tmp = optimalStates.ToArray();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
194 foreach (var stateY in tmp) {
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
195 var stateR1 = new HashSet<int>(stateY);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
196 var stateR2 = new HashSet<int>(stateY);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
197
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
198 stateR1.IntersectWith(stateX);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
199 stateR2.ExceptWith(stateX);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
200
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
201 if (stateR1.Count > 0 && stateR2.Count > 0) {
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
202
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
203
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
204 optimalStates.Remove(stateY);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
205 optimalStates.Add(stateR1);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
206 optimalStates.Add(stateR2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
207
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
208 if (queue.Contains(stateY)) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
209 queue.Remove(stateY);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
210 queue.Add(stateR1);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
211 queue.Add(stateR2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
212 } else {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
213 queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
214 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
215 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
216 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
217 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
218 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
219
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
220 // дополнительно разбиваем конечные состояния
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
221 foreach (var final in optimalStates.Where(s => s.Overlaps(m_finalStates)).ToArray()) {
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
222 optimalStates.Remove(final);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
223 foreach (var split in SplitFinalStates(final))
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
224 optimalStates.Add(split);
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
225 }
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
226
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
227
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
228 // карта получения оптимального состояния по соотвествующему ему простому состоянию
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
229 var nextState = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
230 foreach (var item in optimalStates) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
231 var id = nextState++;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
232 foreach (var s in item)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
233 stateMap[s] = id;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
234 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
235
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
236 // получаем минимальный алфавит
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
237 // входные символы не различимы, если Move(s,a1) == Move(s,a2), для любого s
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
238 // для этого используем алгоритм кластеризации, сначала
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
239 // считаем, что все символы не различимы
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
240
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
241 var minClasses = new HashSet<HashSet<int>>(setComparer);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
242 var alphaQueue = new Queue<HashSet<int>>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
243 alphaQueue.Enqueue(new HashSet<int>(Enumerable.Range(0,AlphabetSize)));
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
244
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
245 // для всех состояний, будем проверять каждый класс на различимость,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
246 // т.е. символы различимы, если они приводят к разным состояниям
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
247 for (int s = 0 ; s < optimalStates.Count; s++) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
248 var newQueue = new Queue<HashSet<int>>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
249
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
250 foreach (var A in alphaQueue) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
251 // классы из одного символа делить бесполезно, переводим их сразу в
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
252 // результирующий алфавит
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
253 if (A.Count == 1) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
254 minClasses.Add(A);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
255 continue;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
256 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
257
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
258 // различаем классы символов, которые переводят в различные оптимальные состояния
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
259 // optimalState -> alphaClass
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
260 var classes = new Dictionary<int, HashSet<int>>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
261
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
262 foreach (var term in A) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
263 // ищем все переходы класса по символу term
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
264 var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
265
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
266 HashSet<int> a2;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
267 if (!classes.TryGetValue(s2, out a2)) {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
268 a2 = new HashSet<int>();
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
269 newQueue.Enqueue(a2);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
270 classes[s2] = a2;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
271 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
272 a2.Add(term);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
273 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
274 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
275
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
276 if (newQueue.Count == 0)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
277 break;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
278 alphaQueue = newQueue;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
279 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
280
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
281 // после окончания работы алгоритма в очереди останутся минимальные различимые классы
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
282 // входных символов
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
283 foreach (var A in alphaQueue)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
284 minClasses.Add(A);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
285
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
286 // построение отображения алфавитов входных символов.
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
287 // поскольку символ DFAConst.UNCLASSIFIED_INPUT может иметь
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
288 // специальное значение, тогда сохраним минимальный класс,
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
289 // содержащий этот символ на томже месте.
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
290
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
291 var nextCls = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
292 foreach (var item in minClasses) {
236
302ca905c19e JsonReader optimizations
cin
parents: 229
diff changeset
293 if (nextCls == AutomatonConst.UnclassifiedInput)
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
294 nextCls++;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
295
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
296 // сохраняем DFAConst.UNCLASSIFIED_INPUT
236
302ca905c19e JsonReader optimizations
cin
parents: 229
diff changeset
297 var cls = item.Contains(AutomatonConst.UnclassifiedInput) ? AutomatonConst.UnclassifiedInput : nextCls++;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
298 optimalDFA.AddSymbol(cls);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
299
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
300 foreach (var a in item)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
301 alphabetMap[a] = cls;
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
302 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
303
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
304 // построение автомата
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
305 optimalDFA.SetInitialState(stateMap[m_initialState]);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
306
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
307 foreach (var sf in m_finalStates.Select(s => stateMap[s]).Distinct())
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
308 optimalDFA.MarkFinalState(sf);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
309
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
310 foreach (var t in m_transitions.Select(t => new AutomatonTransition(stateMap[t.s1],stateMap[t.s2],alphabetMap[t.edge])).Distinct())
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
311 optimalDFA.Add(t);
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
312 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
313
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
314 protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
315 Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
316 Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
317
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
318 var data = new List<string>();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
319
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
320 data.Add("digraph dfa {");
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
321
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
322 foreach (var final in m_finalStates)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
323 data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
324
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
325 foreach (var t in m_transitions)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
326 data.Add(String.Format(
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
327 "{0} -> {2} [label={1}];",
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
328 String.Join("", stateAlphabet.GetSymbols(t.s1)),
236
302ca905c19e JsonReader optimizations
cin
parents: 229
diff changeset
329 ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UnclassifiedInput ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
330 String.Join("", stateAlphabet.GetSymbols(t.s2))
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
331 ));
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
332 data.Add("}");
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
333 return String.Join("\n", data);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
334 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
335
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
336 static string ToLiteral(string input)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
337 {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
338 using (var writer = new StringWriter())
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
339 {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
340 using (var provider = CodeDomProvider.CreateProvider("CSharp"))
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
341 {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
342 provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
343 return writer.ToString();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
344 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
345 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
346 }
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
347 }
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
348 }