172
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Diagnostics;
|
|
5 using System.Linq;
|
|
6
|
|
7 namespace Implab.Automaton.RegularExpressions {
|
|
8 /// <summary>
|
|
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
|
|
10 /// регулярное выражение и вычисляет followpos, затем используется метод
|
|
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
|
|
12 /// </summary>
|
|
13 public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
|
|
14 int m_idx;
|
177
|
15 Token m_root;
|
172
|
16 HashSet<int> m_firstpos;
|
|
17 HashSet<int> m_lastpos;
|
|
18
|
|
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
|
|
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
|
177
|
21 readonly HashSet<int> m_ends = new HashSet<int>();
|
|
22 readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
|
172
|
23
|
|
24 public Dictionary<int, HashSet<int>> FollowposMap {
|
|
25 get { return m_followpos; }
|
|
26 }
|
|
27
|
|
28 public HashSet<int> Followpos(int pos) {
|
|
29 HashSet<int> set;
|
|
30 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
|
|
31 }
|
|
32
|
|
33 bool Nullable(object n) {
|
177
|
34 if (n is EmptyToken || n is StarToken)
|
172
|
35 return true;
|
177
|
36 var altToken = n as AltToken;
|
172
|
37 if (altToken != null)
|
|
38 return Nullable(altToken.Left) || Nullable(altToken.Right);
|
177
|
39 var catToken = n as CatToken;
|
172
|
40 if (catToken != null)
|
|
41 return Nullable(catToken.Left) && Nullable(catToken.Right);
|
|
42 return false;
|
|
43 }
|
|
44
|
|
45
|
177
|
46 public void Visit(AltToken token) {
|
172
|
47 if (m_root == null)
|
|
48 m_root = token;
|
|
49 var firtspos = new HashSet<int>();
|
|
50 var lastpos = new HashSet<int>();
|
|
51
|
|
52 token.Left.Accept(this);
|
|
53 firtspos.UnionWith(m_firstpos);
|
|
54 lastpos.UnionWith(m_lastpos);
|
|
55
|
|
56 token.Right.Accept(this);
|
|
57 firtspos.UnionWith(m_firstpos);
|
|
58 lastpos.UnionWith(m_lastpos);
|
|
59
|
|
60 m_firstpos = firtspos;
|
|
61 m_lastpos = lastpos;
|
|
62 }
|
|
63
|
177
|
64 public void Visit(StarToken token) {
|
172
|
65 if (m_root == null)
|
|
66 m_root = token;
|
|
67 token.Token.Accept(this);
|
|
68
|
|
69 foreach (var i in m_lastpos)
|
|
70 Followpos(i).UnionWith(m_firstpos);
|
|
71 }
|
|
72
|
177
|
73 public void Visit(CatToken token) {
|
172
|
74 if (m_root == null)
|
|
75 m_root = token;
|
|
76
|
|
77 var firtspos = new HashSet<int>();
|
|
78 var lastpos = new HashSet<int>();
|
|
79 token.Left.Accept(this);
|
|
80 firtspos.UnionWith(m_firstpos);
|
|
81 var leftLastpos = m_lastpos;
|
|
82
|
|
83 token.Right.Accept(this);
|
|
84 lastpos.UnionWith(m_lastpos);
|
|
85 var rightFirstpos = m_firstpos;
|
|
86
|
|
87 if (Nullable(token.Left))
|
|
88 firtspos.UnionWith(rightFirstpos);
|
|
89
|
|
90 if (Nullable(token.Right))
|
|
91 lastpos.UnionWith(leftLastpos);
|
|
92
|
|
93 m_firstpos = firtspos;
|
|
94 m_lastpos = lastpos;
|
|
95
|
|
96 foreach (var i in leftLastpos)
|
|
97 Followpos(i).UnionWith(rightFirstpos);
|
|
98
|
|
99 }
|
|
100
|
177
|
101 public void Visit(EmptyToken token) {
|
172
|
102 if (m_root == null)
|
|
103 m_root = token;
|
|
104 }
|
|
105
|
177
|
106 public void Visit(SymbolToken token) {
|
172
|
107 if (m_root == null)
|
|
108 m_root = token;
|
|
109 m_idx++;
|
|
110 m_indexes[m_idx] = token.Value;
|
|
111 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
112 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
113 }
|
|
114
|
|
115 public void Visit(EndToken<TTag> token) {
|
|
116 if (m_root == null)
|
|
117 m_root = token;
|
|
118 m_idx++;
|
|
119 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
|
|
120 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
121 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
122 Followpos(m_idx);
|
177
|
123 m_ends.Add(m_idx);
|
|
124 m_tags.Add(m_idx, token.Tag);
|
|
125 }
|
|
126
|
|
127 public void Visit(EndToken token) {
|
|
128 if (m_root == null)
|
|
129 m_root = token;
|
|
130 m_idx++;
|
|
131 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
|
|
132 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
133 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
134 Followpos(m_idx);
|
|
135 m_ends.Add(m_idx);
|
172
|
136 }
|
|
137
|
|
138 public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
|
|
139 Safe.ArgumentNotNull(dfa,"dfa");
|
|
140
|
|
141 var states = new MapAlphabet<HashSet<int>>(
|
|
142 false,
|
|
143 new CustomEqualityComparer<HashSet<int>>(
|
|
144 (x, y) => x.SetEquals(y),
|
|
145 x => x.Sum(n => n.GetHashCode())
|
|
146 ));
|
|
147
|
|
148 var initialState = states.DefineSymbol(m_firstpos);
|
|
149 dfa.SetInitialState(initialState);
|
|
150
|
|
151 var tags = GetStateTags(m_firstpos);
|
|
152 if (tags != null && tags.Length > 0)
|
|
153 dfa.MarkFinalState(initialState, tags);
|
|
154
|
|
155 var inputMax = m_indexes.Values.Max();
|
|
156 var queue = new Queue<HashSet<int>>();
|
|
157
|
|
158 queue.Enqueue(m_firstpos);
|
|
159
|
|
160 while (queue.Count > 0) {
|
|
161 var state = queue.Dequeue();
|
|
162 var s1 = states.Translate(state);
|
|
163 Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
|
|
164
|
|
165 for (int a = 0; a <= inputMax; a++) {
|
|
166 var next = new HashSet<int>();
|
|
167 foreach (var p in state) {
|
|
168 if (m_indexes[p] == a) {
|
|
169 next.UnionWith(Followpos(p));
|
|
170 }
|
|
171 }
|
|
172 if (next.Count > 0) {
|
177
|
173 int s2;
|
|
174 if (states.Contains(next)) {
|
|
175 s2 = states.Translate(next);
|
|
176 } else {
|
172
|
177 s2 = states.DefineSymbol(next);
|
|
178
|
177
|
179 if (IsFinal(next)) {
|
|
180
|
172
|
181 dfa.MarkFinalState(s2);
|
177
|
182 tags = GetStateTags(next);
|
|
183 if (tags != null && tags.Length > 0)
|
|
184 dfa.SetStateTag(s2, tags);
|
172
|
185 }
|
|
186
|
|
187 queue.Enqueue(next);
|
|
188 }
|
|
189 dfa.Add(new AutomatonTransition(s1, s2, a));
|
|
190 }
|
|
191 }
|
|
192 }
|
|
193 }
|
|
194
|
177
|
195 bool IsFinal(IEnumerable<int> state) {
|
|
196 Debug.Assert(state != null);
|
|
197 return state.Any(m_ends.Contains);
|
|
198 }
|
|
199
|
172
|
200 TTag[] GetStateTags(IEnumerable<int> state) {
|
|
201 Debug.Assert(state != null);
|
177
|
202 return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
|
172
|
203 }
|
|
204
|
|
205 }
|
|
206 }
|