comparison Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 190:1c2a16d071a7 v2

Слияние с ref20160224
author cin
date Fri, 22 Apr 2016 13:08:08 +0300
parents d5c5db0335ee
children 302ca905c19e
comparison
equal deleted inserted replaced
161:2a8466f0cb8a 190:1c2a16d071a7
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6
7 namespace Implab.Automaton.RegularExpressions {
8 /// <summary>
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит
10 /// регулярное выражение и вычисляет followpos, затем используется метод
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
12 /// </summary>
13 public class RegularExpressionVisitor : IVisitor {
14 int m_idx;
15 Token m_root;
16 HashSet<int> m_firstpos;
17 HashSet<int> m_lastpos;
18
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
21 readonly HashSet<int> m_ends = new HashSet<int>();
22
23 readonly IDFATableBuilder m_builder;
24 readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
25 false,
26 new CustomEqualityComparer<HashSet<int>>(
27 (x, y) => x.SetEquals(y),
28 x => x.Sum(n => n.GetHashCode())
29 )
30 );
31
32 public RegularExpressionVisitor(IDFATableBuilder builder) {
33 Safe.ArgumentNotNull(builder, "builder");
34
35 m_builder = builder;
36 }
37
38 HashSet<int> Followpos(int pos) {
39 HashSet<int> set;
40 return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
41 }
42
43 bool Nullable(object n) {
44 if (n is EmptyToken || n is StarToken)
45 return true;
46 var altToken = n as AltToken;
47 if (altToken != null)
48 return Nullable(altToken.Left) || Nullable(altToken.Right);
49 var catToken = n as CatToken;
50 if (catToken != null)
51 return Nullable(catToken.Left) && Nullable(catToken.Right);
52 return false;
53 }
54
55 protected int Index {
56 get { return m_idx; }
57 }
58
59 public void Visit(AltToken token) {
60 if (m_root == null)
61 m_root = token;
62 var firtspos = new HashSet<int>();
63 var lastpos = new HashSet<int>();
64
65 token.Left.Accept(this);
66 firtspos.UnionWith(m_firstpos);
67 lastpos.UnionWith(m_lastpos);
68
69 token.Right.Accept(this);
70 firtspos.UnionWith(m_firstpos);
71 lastpos.UnionWith(m_lastpos);
72
73 m_firstpos = firtspos;
74 m_lastpos = lastpos;
75 }
76
77 public void Visit(StarToken token) {
78 if (m_root == null)
79 m_root = token;
80 token.Token.Accept(this);
81
82 foreach (var i in m_lastpos)
83 Followpos(i).UnionWith(m_firstpos);
84 }
85
86 public void Visit(CatToken token) {
87 if (m_root == null)
88 m_root = token;
89
90 var firtspos = new HashSet<int>();
91 var lastpos = new HashSet<int>();
92 token.Left.Accept(this);
93 firtspos.UnionWith(m_firstpos);
94 var leftLastpos = m_lastpos;
95
96 token.Right.Accept(this);
97 lastpos.UnionWith(m_lastpos);
98 var rightFirstpos = m_firstpos;
99
100 if (Nullable(token.Left))
101 firtspos.UnionWith(rightFirstpos);
102
103 if (Nullable(token.Right))
104 lastpos.UnionWith(leftLastpos);
105
106 m_firstpos = firtspos;
107 m_lastpos = lastpos;
108
109 foreach (var i in leftLastpos)
110 Followpos(i).UnionWith(rightFirstpos);
111
112 }
113
114 public void Visit(EmptyToken token) {
115 if (m_root == null)
116 m_root = token;
117 }
118
119 public void Visit(SymbolToken token) {
120 if (m_root == null)
121 m_root = token;
122 m_idx++;
123 m_indexes[m_idx] = token.Value;
124 m_firstpos = new HashSet<int>(new[] { m_idx });
125 m_lastpos = new HashSet<int>(new[] { m_idx });
126 }
127
128 public virtual void Visit(EndToken token) {
129 if (m_root == null)
130 m_root = token;
131 m_idx++;
132 m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
133 m_firstpos = new HashSet<int>(new[] { m_idx });
134 m_lastpos = new HashSet<int>(new[] { m_idx });
135 Followpos(m_idx);
136 m_ends.Add(m_idx);
137 }
138
139 public void BuildDFA() {
140 AddState(m_firstpos);
141 SetInitialState(m_firstpos);
142
143 if(IsFinal(m_firstpos))
144 MarkFinalState(m_firstpos);
145
146 var inputMax = m_indexes.Values.Max();
147 var queue = new Queue<HashSet<int>>();
148
149 queue.Enqueue(m_firstpos);
150
151 while (queue.Count > 0) {
152 var s1 = queue.Dequeue();
153
154 for (int a = 0; a <= inputMax; a++) {
155 var s2 = new HashSet<int>();
156 foreach (var p in s1) {
157 if (m_indexes[p] == a) {
158 s2.UnionWith(Followpos(p));
159 }
160 }
161 if (s2.Count > 0) {
162 if (!HasState(s2)) {
163 AddState(s2);
164 if (IsFinal(s2))
165 MarkFinalState(s2);
166
167 queue.Enqueue(s2);
168 }
169
170 DefineTransition(s1, s2, a);
171 }
172
173 }
174 }
175 }
176
177 protected bool HasState(HashSet<int> state) {
178 return m_states.Contains(state);
179 }
180
181 protected void AddState(HashSet<int> state) {
182 Debug.Assert(!HasState(state));
183
184 m_states.DefineSymbol(state);
185 }
186
187 protected int Translate(HashSet<int> state) {
188 Debug.Assert(HasState(state));
189
190 return m_states.Translate(state);
191 }
192
193 protected virtual void SetInitialState(HashSet<int> state) {
194 m_builder.SetInitialState(Translate(state));
195 }
196
197 protected virtual void MarkFinalState(HashSet<int> state) {
198 m_builder.MarkFinalState(Translate(state));
199 }
200
201 protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
202
203 m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
204 }
205
206 bool IsFinal(IEnumerable<int> state) {
207 Debug.Assert(state != null);
208 return state.Any(m_ends.Contains);
209 }
210
211 }
212 }