Mercurial > pub > ImplabNet
annotate Implab/Parsing/DFABuilder.cs @ 158:130781364799 v2
refactoring, code cleanup
author | cin |
---|---|
date | Thu, 18 Feb 2016 14:34:02 +0300 |
parents | 97fbbf816844 |
children |
rev | line source |
---|---|
55 | 1 using Implab; |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 | |
7 namespace Implab.Parsing { | |
8 /// <summary> | |
9 /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
10 /// регулярное выражение и вычисляет followpos, затем используется метод | |
11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
12 /// </summary> | |
13 public class DFABuilder : IVisitor { | |
14 int m_idx = 0; | |
15 Token m_root; | |
16 HashSet<int> m_firstpos; | |
17 HashSet<int> m_lastpos; | |
18 | |
19 Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
20 Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
21 Dictionary<int, int> m_ends = new Dictionary<int, int>(); | |
22 | |
23 public Dictionary<int, HashSet<int>> FollowposMap { | |
24 get { return m_followpos; } | |
25 } | |
26 | |
27 public HashSet<int> Followpos(int pos) { | |
28 HashSet<int> set; | |
29 if (m_followpos.TryGetValue(pos, out set)) | |
30 return set; | |
31 return m_followpos[pos] = new HashSet<int>(); | |
32 } | |
33 | |
34 bool Nullable(object n) { | |
35 if (n is EmptyToken || n is StarToken) | |
36 return true; | |
37 if (n is AltToken) | |
38 return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); | |
39 if (n is CatToken) | |
40 return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); | |
41 return false; | |
42 } | |
43 | |
44 | |
45 public void Visit(AltToken token) { | |
46 if (m_root == null) | |
47 m_root = token; | |
48 var firtspos = new HashSet<int>(); | |
49 var lastpos = new HashSet<int>(); | |
50 | |
51 token.Left.Accept(this); | |
52 firtspos.UnionWith(m_firstpos); | |
53 lastpos.UnionWith(m_lastpos); | |
54 | |
55 token.Right.Accept(this); | |
56 firtspos.UnionWith(m_firstpos); | |
57 lastpos.UnionWith(m_lastpos); | |
58 | |
59 m_firstpos = firtspos; | |
60 m_lastpos = lastpos; | |
61 } | |
62 | |
63 public void Visit(StarToken token) { | |
64 if (m_root == null) | |
65 m_root = token; | |
66 token.Token.Accept(this); | |
67 | |
68 foreach (var i in m_lastpos) | |
69 Followpos(i).UnionWith(m_firstpos); | |
70 } | |
71 | |
72 public void Visit(CatToken token) { | |
73 if (m_root == null) | |
74 m_root = token; | |
75 | |
76 var firtspos = new HashSet<int>(); | |
77 var lastpos = new HashSet<int>(); | |
78 token.Left.Accept(this); | |
79 firtspos.UnionWith(m_firstpos); | |
80 var leftLastpos = m_lastpos; | |
81 | |
82 token.Right.Accept(this); | |
83 lastpos.UnionWith(m_lastpos); | |
84 var rightFirstpos = m_firstpos; | |
85 | |
86 if (Nullable(token.Left)) | |
87 firtspos.UnionWith(rightFirstpos); | |
88 | |
89 if (Nullable(token.Right)) | |
90 lastpos.UnionWith(leftLastpos); | |
91 | |
92 m_firstpos = firtspos; | |
93 m_lastpos = lastpos; | |
94 | |
95 foreach (var i in leftLastpos) | |
96 Followpos(i).UnionWith(rightFirstpos); | |
97 | |
98 } | |
99 | |
100 public void Visit(EmptyToken token) { | |
101 if (m_root == null) | |
102 m_root = token; | |
103 ; | |
104 } | |
105 | |
106 public void Visit(SymbolToken token) { | |
107 if (m_root == null) | |
108 m_root = token; | |
109 m_idx++; | |
110 m_indexes[m_idx] = token.Value; | |
111 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
112 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
113 } | |
114 | |
115 public void Visit(EndToken token) { | |
116 if (m_root == null) | |
117 m_root = token; | |
118 m_idx++; | |
158 | 119 m_indexes[m_idx] = IndexedAlphabetBase<char>.UNCLASSIFIED; |
55 | 120 m_firstpos = new HashSet<int>(new[] { m_idx }); |
121 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
122 Followpos(m_idx); | |
123 m_ends.Add(m_idx, token.Tag); | |
124 } | |
125 | |
126 public void BuildDFA(IDFADefinition dfa) { | |
127 Safe.ArgumentNotNull(dfa,"dfa"); | |
128 | |
129 var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | |
130 (x, y) => x.SetEquals(y), | |
131 (x) => x.Sum(n => n.GetHashCode()) | |
132 )); | |
133 | |
134 stateMap[m_firstpos] = DefineState( dfa, m_firstpos); | |
158 | 135 Debug.Assert(stateMap[m_firstpos] == DFADefinition.INITIAL_STATE); |
55 | 136 |
137 var queue = new Queue<HashSet<int>>(); | |
138 | |
139 queue.Enqueue(m_firstpos); | |
140 | |
141 while (queue.Count > 0) { | |
142 var state = queue.Dequeue(); | |
143 var s1 = stateMap[state]; | |
144 | |
145 for (int a = 0; a < dfa.AlphabetSize; a++) { | |
146 var next = new HashSet<int>(); | |
147 foreach (var p in state) { | |
148 if (m_indexes[p] == a) { | |
149 next.UnionWith(Followpos(p)); | |
150 } | |
151 } | |
152 if (next.Count > 0) { | |
153 int s2; | |
154 if (!stateMap.TryGetValue(next, out s2)) { | |
155 stateMap[next] = s2 = DefineState(dfa, next); | |
156 queue.Enqueue(next); | |
157 } | |
158 dfa.DefineTransition(s1, s2, a); | |
159 } | |
160 } | |
161 | |
162 } | |
163 } | |
164 | |
165 int[] GetStateTags(HashSet<int> state) { | |
166 Debug.Assert(state != null); | |
156
97fbbf816844
Promises: SignalXXX methods merged into SignalHandler method.
cin
parents:
55
diff
changeset
|
167 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); |
55 | 168 } |
169 | |
170 int DefineState(IDFADefinition automa, HashSet<int> state) { | |
171 Debug.Assert(automa != null); | |
172 Debug.Assert(state != null); | |
173 | |
174 var tags = GetStateTags(state); | |
175 | |
176 return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); | |
177 } | |
178 | |
179 } | |
180 } |