Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/RegularDFABuilder.cs @ 163:419aa51b04fd ref20160224
JSON moved to Formats namespace
Working in RegularDFA
author | cin |
---|---|
date | Wed, 24 Feb 2016 20:12:52 +0300 |
parents | |
children | ec35731ae299 |
comparison
equal
deleted
inserted
replaced
162:0526412bbb26 | 163:419aa51b04fd |
---|---|
1 using Implab; | |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 | |
7 namespace Implab.Automaton.RegularExpressions { | |
8 /// <summary> | |
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
10 /// регулярное выражение и вычисляет followpos, затем используется метод | |
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
12 /// </summary> | |
13 public class RegularDFABuilder<TTag> : IVisitor<TTag> { | |
14 int m_idx = 0; | |
15 Token<TTag> m_root; | |
16 HashSet<int> m_firstpos; | |
17 HashSet<int> m_lastpos; | |
18 | |
19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
21 readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | |
22 | |
23 public Dictionary<int, HashSet<int>> FollowposMap { | |
24 get { return m_followpos; } | |
25 } | |
26 | |
27 public HashSet<int> Followpos(int pos) { | |
28 HashSet<int> set; | |
29 if (m_followpos.TryGetValue(pos, out set)) | |
30 return set; | |
31 return m_followpos[pos] = new HashSet<int>(); | |
32 } | |
33 | |
34 bool Nullable(object n) { | |
35 if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |
36 return true; | |
37 if (n is AltToken<TTag>) | |
38 return Nullable(((AltToken<TTag>)n).Left) || Nullable(((AltToken<TTag>)n).Right); | |
39 if (n is CatToken<TTag>) | |
40 return Nullable(((CatToken<TTag>)n).Left) && Nullable(((CatToken<TTag>)n).Right); | |
41 return false; | |
42 } | |
43 | |
44 | |
45 public void Visit(AltToken<TTag> token) { | |
46 if (m_root == null) | |
47 m_root = token; | |
48 var firtspos = new HashSet<int>(); | |
49 var lastpos = new HashSet<int>(); | |
50 | |
51 token.Left.Accept(this); | |
52 firtspos.UnionWith(m_firstpos); | |
53 lastpos.UnionWith(m_lastpos); | |
54 | |
55 token.Right.Accept(this); | |
56 firtspos.UnionWith(m_firstpos); | |
57 lastpos.UnionWith(m_lastpos); | |
58 | |
59 m_firstpos = firtspos; | |
60 m_lastpos = lastpos; | |
61 } | |
62 | |
63 public void Visit(StarToken<TTag> token) { | |
64 if (m_root == null) | |
65 m_root = token; | |
66 token.Token.Accept(this); | |
67 | |
68 foreach (var i in m_lastpos) | |
69 Followpos(i).UnionWith(m_firstpos); | |
70 } | |
71 | |
72 public void Visit(CatToken<TTag> token) { | |
73 if (m_root == null) | |
74 m_root = token; | |
75 | |
76 var firtspos = new HashSet<int>(); | |
77 var lastpos = new HashSet<int>(); | |
78 token.Left.Accept(this); | |
79 firtspos.UnionWith(m_firstpos); | |
80 var leftLastpos = m_lastpos; | |
81 | |
82 token.Right.Accept(this); | |
83 lastpos.UnionWith(m_lastpos); | |
84 var rightFirstpos = m_firstpos; | |
85 | |
86 if (Nullable(token.Left)) | |
87 firtspos.UnionWith(rightFirstpos); | |
88 | |
89 if (Nullable(token.Right)) | |
90 lastpos.UnionWith(leftLastpos); | |
91 | |
92 m_firstpos = firtspos; | |
93 m_lastpos = lastpos; | |
94 | |
95 foreach (var i in leftLastpos) | |
96 Followpos(i).UnionWith(rightFirstpos); | |
97 | |
98 } | |
99 | |
100 public void Visit(EmptyToken<TTag> token) { | |
101 if (m_root == null) | |
102 m_root = token; | |
103 } | |
104 | |
105 public void Visit(SymbolToken<TTag> token) { | |
106 if (m_root == null) | |
107 m_root = token; | |
108 m_idx++; | |
109 m_indexes[m_idx] = token.Value; | |
110 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
111 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
112 } | |
113 | |
114 public void Visit(EndToken<TTag> token) { | |
115 if (m_root == null) | |
116 m_root = token; | |
117 m_idx++; | |
118 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
119 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
120 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
121 Followpos(m_idx); | |
122 m_ends.Add(m_idx, token.Tag); | |
123 } | |
124 | |
125 public void BuildDFA(IDFADefinitionBuilder<TTag> dfa) { | |
126 Safe.ArgumentNotNull(dfa,"dfa"); | |
127 | |
128 var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( | |
129 (x, y) => x.SetEquals(y), | |
130 x => x.Sum(n => n.GetHashCode()) | |
131 )); | |
132 | |
133 var initialState = states.DefineSymbol(m_firstpos); | |
134 | |
135 var tags = GetStateTags(m_firstpos); | |
136 if (tags != null && tags.Length > 0) | |
137 dfa.MarkFinalState(initialState, tags); | |
138 | |
139 var inputMax = m_indexes.Values.Max(); | |
140 var queue = new Queue<HashSet<int>>(); | |
141 | |
142 queue.Enqueue(m_firstpos); | |
143 | |
144 while (queue.Count > 0) { | |
145 var state = queue.Dequeue(); | |
146 var s1 = states.Translate(state); | |
147 Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); | |
148 | |
149 for (int a = 0; a <= inputMax; a++) { | |
150 var next = new HashSet<int>(); | |
151 foreach (var p in state) { | |
152 if (m_indexes[p] == a) { | |
153 next.UnionWith(Followpos(p)); | |
154 } | |
155 } | |
156 if (next.Count > 0) { | |
157 int s2 = states.Translate(next); | |
158 if (s2 == DFAConst.UNCLASSIFIED_INPUT) { | |
159 s2 = states.DefineSymbol(next); | |
160 | |
161 tags = GetStateTags(next); | |
162 if (tags != null && tags.Length > 0) | |
163 dfa.MarkFinalState(s2, tags); | |
164 | |
165 queue.Enqueue(next); | |
166 } | |
167 dfa.DefineTransition(s1, s2, a); | |
168 } | |
169 } | |
170 } | |
171 } | |
172 | |
173 TTag[] GetStateTags(IEnumerable<int> state) { | |
174 Debug.Assert(state != null); | |
175 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | |
176 } | |
177 | |
178 } | |
179 } |