Mercurial > pub > ImplabNet
annotate Implab/Parsing/DFABuilder.cs @ 156:97fbbf816844 v2
Promises: SignalXXX methods merged into SignalHandler method.
Components: RunnableComponent In progress
author | cin |
---|---|
date | Mon, 15 Feb 2016 04:22:15 +0300 |
parents | c0bf853aa04f |
children | 130781364799 |
rev | line source |
---|---|
55 | 1 using Implab; |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 using System.Text; | |
7 using System.Threading.Tasks; | |
8 | |
9 namespace Implab.Parsing { | |
10 /// <summary> | |
11 /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
12 /// регулярное выражение и вычисляет followpos, затем используется метод | |
13 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
14 /// </summary> | |
15 public class DFABuilder : IVisitor { | |
16 int m_idx = 0; | |
17 Token m_root; | |
18 HashSet<int> m_firstpos; | |
19 HashSet<int> m_lastpos; | |
20 | |
21 Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
22 Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
23 Dictionary<int, int> m_ends = new Dictionary<int, int>(); | |
24 | |
25 public Dictionary<int, HashSet<int>> FollowposMap { | |
26 get { return m_followpos; } | |
27 } | |
28 | |
29 public HashSet<int> Followpos(int pos) { | |
30 HashSet<int> set; | |
31 if (m_followpos.TryGetValue(pos, out set)) | |
32 return set; | |
33 return m_followpos[pos] = new HashSet<int>(); | |
34 } | |
35 | |
36 bool Nullable(object n) { | |
37 if (n is EmptyToken || n is StarToken) | |
38 return true; | |
39 if (n is AltToken) | |
40 return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); | |
41 if (n is CatToken) | |
42 return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); | |
43 return false; | |
44 } | |
45 | |
46 | |
47 public void Visit(AltToken token) { | |
48 if (m_root == null) | |
49 m_root = token; | |
50 var firtspos = new HashSet<int>(); | |
51 var lastpos = new HashSet<int>(); | |
52 | |
53 token.Left.Accept(this); | |
54 firtspos.UnionWith(m_firstpos); | |
55 lastpos.UnionWith(m_lastpos); | |
56 | |
57 token.Right.Accept(this); | |
58 firtspos.UnionWith(m_firstpos); | |
59 lastpos.UnionWith(m_lastpos); | |
60 | |
61 m_firstpos = firtspos; | |
62 m_lastpos = lastpos; | |
63 } | |
64 | |
65 public void Visit(StarToken token) { | |
66 if (m_root == null) | |
67 m_root = token; | |
68 token.Token.Accept(this); | |
69 | |
70 foreach (var i in m_lastpos) | |
71 Followpos(i).UnionWith(m_firstpos); | |
72 } | |
73 | |
74 public void Visit(CatToken token) { | |
75 if (m_root == null) | |
76 m_root = token; | |
77 | |
78 var firtspos = new HashSet<int>(); | |
79 var lastpos = new HashSet<int>(); | |
80 token.Left.Accept(this); | |
81 firtspos.UnionWith(m_firstpos); | |
82 var leftLastpos = m_lastpos; | |
83 | |
84 token.Right.Accept(this); | |
85 lastpos.UnionWith(m_lastpos); | |
86 var rightFirstpos = m_firstpos; | |
87 | |
88 if (Nullable(token.Left)) | |
89 firtspos.UnionWith(rightFirstpos); | |
90 | |
91 if (Nullable(token.Right)) | |
92 lastpos.UnionWith(leftLastpos); | |
93 | |
94 m_firstpos = firtspos; | |
95 m_lastpos = lastpos; | |
96 | |
97 foreach (var i in leftLastpos) | |
98 Followpos(i).UnionWith(rightFirstpos); | |
99 | |
100 } | |
101 | |
102 public void Visit(EmptyToken token) { | |
103 if (m_root == null) | |
104 m_root = token; | |
105 ; | |
106 } | |
107 | |
108 public void Visit(SymbolToken token) { | |
109 if (m_root == null) | |
110 m_root = token; | |
111 m_idx++; | |
112 m_indexes[m_idx] = token.Value; | |
113 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
114 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
115 } | |
116 | |
117 public void Visit(EndToken token) { | |
118 if (m_root == null) | |
119 m_root = token; | |
120 m_idx++; | |
121 m_indexes[m_idx] = Alphabet.UNCLASSIFIED; | |
122 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
123 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
124 Followpos(m_idx); | |
125 m_ends.Add(m_idx, token.Tag); | |
126 } | |
127 | |
128 public void BuildDFA(IDFADefinition dfa) { | |
129 Safe.ArgumentNotNull(dfa,"dfa"); | |
130 | |
131 var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | |
132 (x, y) => x.SetEquals(y), | |
133 (x) => x.Sum(n => n.GetHashCode()) | |
134 )); | |
135 | |
136 stateMap[m_firstpos] = DefineState( dfa, m_firstpos); | |
137 Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); | |
138 | |
139 var queue = new Queue<HashSet<int>>(); | |
140 | |
141 queue.Enqueue(m_firstpos); | |
142 | |
143 while (queue.Count > 0) { | |
144 var state = queue.Dequeue(); | |
145 var s1 = stateMap[state]; | |
146 | |
147 for (int a = 0; a < dfa.AlphabetSize; a++) { | |
148 var next = new HashSet<int>(); | |
149 foreach (var p in state) { | |
150 if (m_indexes[p] == a) { | |
151 next.UnionWith(Followpos(p)); | |
152 } | |
153 } | |
154 if (next.Count > 0) { | |
155 int s2; | |
156 if (!stateMap.TryGetValue(next, out s2)) { | |
157 stateMap[next] = s2 = DefineState(dfa, next); | |
158 queue.Enqueue(next); | |
159 } | |
160 dfa.DefineTransition(s1, s2, a); | |
161 } | |
162 } | |
163 | |
164 } | |
165 } | |
166 | |
167 int[] GetStateTags(HashSet<int> state) { | |
168 Debug.Assert(state != null); | |
156
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
169 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); |
55 | 170 } |
171 | |
172 int DefineState(IDFADefinition automa, HashSet<int> state) { | |
173 Debug.Assert(automa != null); | |
174 Debug.Assert(state != null); | |
175 | |
176 var tags = GetStateTags(state); | |
177 | |
178 return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); | |
179 } | |
180 | |
181 } | |
182 } |