annotate Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 209:a867536c68fc v2

Bound promise to CancellationToken Added new states to ExecutionSate enum. Added Safe.Guard() method to handle cleanup of the result of the promise
author cin
date Wed, 16 Nov 2016 03:06:08 +0300
parents d5c5db0335ee
children 302ca905c19e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
1 using Implab;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
2 using System;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
3 using System.Collections.Generic;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
4 using System.Diagnostics;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
5 using System.Linq;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
6
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
7 namespace Implab.Automaton.RegularExpressions {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
8 /// <summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
10 /// регулярное выражение и вычисляет followpos, затем используется метод
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
12 /// </summary>
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
13 public class RegularExpressionVisitor : IVisitor {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
14 int m_idx;
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
15 Token m_root;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
16 HashSet<int> m_firstpos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
17 HashSet<int> m_lastpos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
18
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
21 readonly HashSet<int> m_ends = new HashSet<int>();
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
22
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
23 readonly IDFATableBuilder m_builder;
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
25 false,
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
26 new CustomEqualityComparer<HashSet<int>>(
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
27 (x, y) => x.SetEquals(y),
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
28 x => x.Sum(n => n.GetHashCode())
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
29 )
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
30 );
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
31
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
32 public RegularExpressionVisitor(IDFATableBuilder builder) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
33 Safe.ArgumentNotNull(builder, "builder");
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
34
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
35 m_builder = builder;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
36 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
37
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
38 HashSet<int> Followpos(int pos) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
39 HashSet<int> set;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
41 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
42
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
43 bool Nullable(object n) {
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
44 if (n is EmptyToken || n is StarToken)
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
45 return true;
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
46 var altToken = n as AltToken;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
47 if (altToken != null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
48 return Nullable(altToken.Left) || Nullable(altToken.Right);
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
49 var catToken = n as CatToken;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
50 if (catToken != null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
51 return Nullable(catToken.Left) && Nullable(catToken.Right);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
52 return false;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
53 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
54
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
55 protected int Index {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
56 get { return m_idx; }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
57 }
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
58
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
59 public void Visit(AltToken token) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
60 if (m_root == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
61 m_root = token;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
62 var firtspos = new HashSet<int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
63 var lastpos = new HashSet<int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
64
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
65 token.Left.Accept(this);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
66 firtspos.UnionWith(m_firstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
67 lastpos.UnionWith(m_lastpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
68
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
69 token.Right.Accept(this);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
70 firtspos.UnionWith(m_firstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
71 lastpos.UnionWith(m_lastpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
72
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
73 m_firstpos = firtspos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
74 m_lastpos = lastpos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
75 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
76
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
77 public void Visit(StarToken token) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
78 if (m_root == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
79 m_root = token;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
80 token.Token.Accept(this);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
81
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
82 foreach (var i in m_lastpos)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
83 Followpos(i).UnionWith(m_firstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
84 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
85
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
86 public void Visit(CatToken token) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
87 if (m_root == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
88 m_root = token;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
89
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
90 var firtspos = new HashSet<int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
91 var lastpos = new HashSet<int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
92 token.Left.Accept(this);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
93 firtspos.UnionWith(m_firstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
94 var leftLastpos = m_lastpos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
95
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
96 token.Right.Accept(this);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
97 lastpos.UnionWith(m_lastpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
98 var rightFirstpos = m_firstpos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
99
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
100 if (Nullable(token.Left))
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
101 firtspos.UnionWith(rightFirstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
102
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
103 if (Nullable(token.Right))
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
104 lastpos.UnionWith(leftLastpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
105
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
106 m_firstpos = firtspos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
107 m_lastpos = lastpos;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
108
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
109 foreach (var i in leftLastpos)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
110 Followpos(i).UnionWith(rightFirstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
111
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
112 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
113
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
114 public void Visit(EmptyToken token) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
115 if (m_root == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
116 m_root = token;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
117 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
118
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
119 public void Visit(SymbolToken token) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
120 if (m_root == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
121 m_root = token;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
122 m_idx++;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
123 m_indexes[m_idx] = token.Value;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
124 m_firstpos = new HashSet<int>(new[] { m_idx });
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
125 m_lastpos = new HashSet<int>(new[] { m_idx });
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
126 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
127
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
128 public virtual void Visit(EndToken token) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
129 if (m_root == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
130 m_root = token;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
131 m_idx++;
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
133 m_firstpos = new HashSet<int>(new[] { m_idx });
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
134 m_lastpos = new HashSet<int>(new[] { m_idx });
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
135 Followpos(m_idx);
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
136 m_ends.Add(m_idx);
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
137 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
138
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
139 public void BuildDFA() {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
140 AddState(m_firstpos);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
141 SetInitialState(m_firstpos);
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
142
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
143 if(IsFinal(m_firstpos))
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
144 MarkFinalState(m_firstpos);
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
145
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
146 var inputMax = m_indexes.Values.Max();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
147 var queue = new Queue<HashSet<int>>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
148
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
149 queue.Enqueue(m_firstpos);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
150
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
151 while (queue.Count > 0) {
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
152 var s1 = queue.Dequeue();
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
153
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
154 for (int a = 0; a <= inputMax; a++) {
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
155 var s2 = new HashSet<int>();
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
156 foreach (var p in s1) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
157 if (m_indexes[p] == a) {
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
158 s2.UnionWith(Followpos(p));
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
159 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
160 }
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
161 if (s2.Count > 0) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
162 if (!HasState(s2)) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
163 AddState(s2);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
164 if (IsFinal(s2))
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
165 MarkFinalState(s2);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
166
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
167 queue.Enqueue(s2);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
168 }
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
169
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
170 DefineTransition(s1, s2, a);
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
171 }
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
172
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
173 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
174 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
175 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
176
178
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
177 protected bool HasState(HashSet<int> state) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
178 return m_states.Contains(state);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
179 }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
180
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
181 protected void AddState(HashSet<int> state) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
182 Debug.Assert(!HasState(state));
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
183
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
184 m_states.DefineSymbol(state);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
185 }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
186
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
187 protected int Translate(HashSet<int> state) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
188 Debug.Assert(HasState(state));
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
189
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
190 return m_states.Translate(state);
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
191 }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
192
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
193 protected virtual void SetInitialState(HashSet<int> state) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
194 m_builder.SetInitialState(Translate(state));
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
195 }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
196
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
197 protected virtual void MarkFinalState(HashSet<int> state) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
198 m_builder.MarkFinalState(Translate(state));
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
199 }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
200
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
202
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
204 }
d5c5db0335ee working on JSON parser
cin
parents: 177
diff changeset
205
177
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
206 bool IsFinal(IEnumerable<int> state) {
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
207 Debug.Assert(state != null);
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
208 return state.Any(m_ends.Contains);
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
209 }
a0ff6a0e9c44 refactoring
cin
parents: 172
diff changeset
210
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
211 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
212 }