172
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Diagnostics;
|
|
5 using System.Linq;
|
|
6
|
|
7 namespace Implab.Automaton.RegularExpressions {
|
|
8 /// <summary>
|
|
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
|
|
10 /// регулярное выражение и вычисляет followpos, затем используется метод
|
|
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
|
|
12 /// </summary>
|
178
|
13 public class RegularExpressionVisitor : IVisitor {
|
172
|
14 int m_idx;
|
177
|
15 Token m_root;
|
172
|
16 HashSet<int> m_firstpos;
|
|
17 HashSet<int> m_lastpos;
|
|
18
|
|
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
|
|
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
|
177
|
21 readonly HashSet<int> m_ends = new HashSet<int>();
|
172
|
22
|
178
|
23 readonly IDFATableBuilder m_builder;
|
|
24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
|
|
25 false,
|
|
26 new CustomEqualityComparer<HashSet<int>>(
|
|
27 (x, y) => x.SetEquals(y),
|
|
28 x => x.Sum(n => n.GetHashCode())
|
|
29 )
|
|
30 );
|
|
31
|
|
32 public RegularExpressionVisitor(IDFATableBuilder builder) {
|
|
33 Safe.ArgumentNotNull(builder, "builder");
|
|
34
|
|
35 m_builder = builder;
|
172
|
36 }
|
|
37
|
178
|
38 HashSet<int> Followpos(int pos) {
|
172
|
39 HashSet<int> set;
|
|
40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
|
|
41 }
|
|
42
|
|
43 bool Nullable(object n) {
|
177
|
44 if (n is EmptyToken || n is StarToken)
|
172
|
45 return true;
|
177
|
46 var altToken = n as AltToken;
|
172
|
47 if (altToken != null)
|
|
48 return Nullable(altToken.Left) || Nullable(altToken.Right);
|
177
|
49 var catToken = n as CatToken;
|
172
|
50 if (catToken != null)
|
|
51 return Nullable(catToken.Left) && Nullable(catToken.Right);
|
|
52 return false;
|
|
53 }
|
|
54
|
178
|
55 protected int Index {
|
|
56 get { return m_idx; }
|
|
57 }
|
172
|
58
|
177
|
59 public void Visit(AltToken token) {
|
172
|
60 if (m_root == null)
|
|
61 m_root = token;
|
|
62 var firtspos = new HashSet<int>();
|
|
63 var lastpos = new HashSet<int>();
|
|
64
|
|
65 token.Left.Accept(this);
|
|
66 firtspos.UnionWith(m_firstpos);
|
|
67 lastpos.UnionWith(m_lastpos);
|
|
68
|
|
69 token.Right.Accept(this);
|
|
70 firtspos.UnionWith(m_firstpos);
|
|
71 lastpos.UnionWith(m_lastpos);
|
|
72
|
|
73 m_firstpos = firtspos;
|
|
74 m_lastpos = lastpos;
|
|
75 }
|
|
76
|
177
|
77 public void Visit(StarToken token) {
|
172
|
78 if (m_root == null)
|
|
79 m_root = token;
|
|
80 token.Token.Accept(this);
|
|
81
|
|
82 foreach (var i in m_lastpos)
|
|
83 Followpos(i).UnionWith(m_firstpos);
|
|
84 }
|
|
85
|
177
|
86 public void Visit(CatToken token) {
|
172
|
87 if (m_root == null)
|
|
88 m_root = token;
|
|
89
|
|
90 var firtspos = new HashSet<int>();
|
|
91 var lastpos = new HashSet<int>();
|
|
92 token.Left.Accept(this);
|
|
93 firtspos.UnionWith(m_firstpos);
|
|
94 var leftLastpos = m_lastpos;
|
|
95
|
|
96 token.Right.Accept(this);
|
|
97 lastpos.UnionWith(m_lastpos);
|
|
98 var rightFirstpos = m_firstpos;
|
|
99
|
|
100 if (Nullable(token.Left))
|
|
101 firtspos.UnionWith(rightFirstpos);
|
|
102
|
|
103 if (Nullable(token.Right))
|
|
104 lastpos.UnionWith(leftLastpos);
|
|
105
|
|
106 m_firstpos = firtspos;
|
|
107 m_lastpos = lastpos;
|
|
108
|
|
109 foreach (var i in leftLastpos)
|
|
110 Followpos(i).UnionWith(rightFirstpos);
|
|
111
|
|
112 }
|
|
113
|
177
|
114 public void Visit(EmptyToken token) {
|
172
|
115 if (m_root == null)
|
|
116 m_root = token;
|
|
117 }
|
|
118
|
177
|
119 public void Visit(SymbolToken token) {
|
172
|
120 if (m_root == null)
|
|
121 m_root = token;
|
|
122 m_idx++;
|
|
123 m_indexes[m_idx] = token.Value;
|
|
124 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
125 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
126 }
|
|
127
|
178
|
128 public virtual void Visit(EndToken token) {
|
172
|
129 if (m_root == null)
|
|
130 m_root = token;
|
|
131 m_idx++;
|
178
|
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
|
177
|
133 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
134 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
135 Followpos(m_idx);
|
|
136 m_ends.Add(m_idx);
|
172
|
137 }
|
|
138
|
178
|
139 public void BuildDFA() {
|
|
140 AddState(m_firstpos);
|
|
141 SetInitialState(m_firstpos);
|
172
|
142
|
178
|
143 if(IsFinal(m_firstpos))
|
|
144 MarkFinalState(m_firstpos);
|
172
|
145
|
|
146 var inputMax = m_indexes.Values.Max();
|
|
147 var queue = new Queue<HashSet<int>>();
|
|
148
|
|
149 queue.Enqueue(m_firstpos);
|
|
150
|
|
151 while (queue.Count > 0) {
|
178
|
152 var s1 = queue.Dequeue();
|
172
|
153
|
|
154 for (int a = 0; a <= inputMax; a++) {
|
178
|
155 var s2 = new HashSet<int>();
|
|
156 foreach (var p in s1) {
|
172
|
157 if (m_indexes[p] == a) {
|
178
|
158 s2.UnionWith(Followpos(p));
|
172
|
159 }
|
|
160 }
|
178
|
161 if (s2.Count > 0) {
|
|
162 if (!HasState(s2)) {
|
|
163 AddState(s2);
|
|
164 if (IsFinal(s2))
|
|
165 MarkFinalState(s2);
|
|
166
|
|
167 queue.Enqueue(s2);
|
|
168 }
|
172
|
169
|
178
|
170 DefineTransition(s1, s2, a);
|
172
|
171 }
|
178
|
172
|
172
|
173 }
|
|
174 }
|
|
175 }
|
|
176
|
178
|
177 protected bool HasState(HashSet<int> state) {
|
|
178 return m_states.Contains(state);
|
|
179 }
|
|
180
|
|
181 protected void AddState(HashSet<int> state) {
|
|
182 Debug.Assert(!HasState(state));
|
|
183
|
|
184 m_states.DefineSymbol(state);
|
|
185 }
|
|
186
|
|
187 protected int Translate(HashSet<int> state) {
|
|
188 Debug.Assert(HasState(state));
|
|
189
|
|
190 return m_states.Translate(state);
|
|
191 }
|
|
192
|
|
193 protected virtual void SetInitialState(HashSet<int> state) {
|
|
194 m_builder.SetInitialState(Translate(state));
|
|
195 }
|
|
196
|
|
197 protected virtual void MarkFinalState(HashSet<int> state) {
|
|
198 m_builder.MarkFinalState(Translate(state));
|
|
199 }
|
|
200
|
|
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
|
|
202
|
|
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
|
|
204 }
|
|
205
|
177
|
206 bool IsFinal(IEnumerable<int> state) {
|
|
207 Debug.Assert(state != null);
|
|
208 return state.Any(m_ends.Contains);
|
|
209 }
|
|
210
|
172
|
211 }
|
|
212 }
|