55
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Diagnostics;
|
|
5 using System.Linq;
|
|
6 using System.Text;
|
|
7 using System.Threading.Tasks;
|
|
8
|
|
9 namespace Implab.Parsing {
|
|
10 /// <summary>
|
|
11 /// Используется для построения ДКА по регулярному выражению, сначала обходит
|
|
12 /// регулярное выражение и вычисляет followpos, затем используется метод
|
|
13 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
|
|
14 /// </summary>
|
|
15 public class DFABuilder : IVisitor {
|
|
16 int m_idx = 0;
|
|
17 Token m_root;
|
|
18 HashSet<int> m_firstpos;
|
|
19 HashSet<int> m_lastpos;
|
|
20
|
|
21 Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
|
|
22 Dictionary<int, int> m_indexes = new Dictionary<int, int>();
|
|
23 Dictionary<int, int> m_ends = new Dictionary<int, int>();
|
|
24
|
|
25 public Dictionary<int, HashSet<int>> FollowposMap {
|
|
26 get { return m_followpos; }
|
|
27 }
|
|
28
|
|
29 public HashSet<int> Followpos(int pos) {
|
|
30 HashSet<int> set;
|
|
31 if (m_followpos.TryGetValue(pos, out set))
|
|
32 return set;
|
|
33 return m_followpos[pos] = new HashSet<int>();
|
|
34 }
|
|
35
|
|
36 bool Nullable(object n) {
|
|
37 if (n is EmptyToken || n is StarToken)
|
|
38 return true;
|
|
39 if (n is AltToken)
|
|
40 return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right);
|
|
41 if (n is CatToken)
|
|
42 return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right);
|
|
43 return false;
|
|
44 }
|
|
45
|
|
46
|
|
47 public void Visit(AltToken token) {
|
|
48 if (m_root == null)
|
|
49 m_root = token;
|
|
50 var firtspos = new HashSet<int>();
|
|
51 var lastpos = new HashSet<int>();
|
|
52
|
|
53 token.Left.Accept(this);
|
|
54 firtspos.UnionWith(m_firstpos);
|
|
55 lastpos.UnionWith(m_lastpos);
|
|
56
|
|
57 token.Right.Accept(this);
|
|
58 firtspos.UnionWith(m_firstpos);
|
|
59 lastpos.UnionWith(m_lastpos);
|
|
60
|
|
61 m_firstpos = firtspos;
|
|
62 m_lastpos = lastpos;
|
|
63 }
|
|
64
|
|
65 public void Visit(StarToken token) {
|
|
66 if (m_root == null)
|
|
67 m_root = token;
|
|
68 token.Token.Accept(this);
|
|
69
|
|
70 foreach (var i in m_lastpos)
|
|
71 Followpos(i).UnionWith(m_firstpos);
|
|
72 }
|
|
73
|
|
74 public void Visit(CatToken token) {
|
|
75 if (m_root == null)
|
|
76 m_root = token;
|
|
77
|
|
78 var firtspos = new HashSet<int>();
|
|
79 var lastpos = new HashSet<int>();
|
|
80 token.Left.Accept(this);
|
|
81 firtspos.UnionWith(m_firstpos);
|
|
82 var leftLastpos = m_lastpos;
|
|
83
|
|
84 token.Right.Accept(this);
|
|
85 lastpos.UnionWith(m_lastpos);
|
|
86 var rightFirstpos = m_firstpos;
|
|
87
|
|
88 if (Nullable(token.Left))
|
|
89 firtspos.UnionWith(rightFirstpos);
|
|
90
|
|
91 if (Nullable(token.Right))
|
|
92 lastpos.UnionWith(leftLastpos);
|
|
93
|
|
94 m_firstpos = firtspos;
|
|
95 m_lastpos = lastpos;
|
|
96
|
|
97 foreach (var i in leftLastpos)
|
|
98 Followpos(i).UnionWith(rightFirstpos);
|
|
99
|
|
100 }
|
|
101
|
|
102 public void Visit(EmptyToken token) {
|
|
103 if (m_root == null)
|
|
104 m_root = token;
|
|
105 ;
|
|
106 }
|
|
107
|
|
108 public void Visit(SymbolToken token) {
|
|
109 if (m_root == null)
|
|
110 m_root = token;
|
|
111 m_idx++;
|
|
112 m_indexes[m_idx] = token.Value;
|
|
113 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
114 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
115 }
|
|
116
|
|
117 public void Visit(EndToken token) {
|
|
118 if (m_root == null)
|
|
119 m_root = token;
|
|
120 m_idx++;
|
|
121 m_indexes[m_idx] = Alphabet.UNCLASSIFIED;
|
|
122 m_firstpos = new HashSet<int>(new[] { m_idx });
|
|
123 m_lastpos = new HashSet<int>(new[] { m_idx });
|
|
124 Followpos(m_idx);
|
|
125 m_ends.Add(m_idx, token.Tag);
|
|
126 }
|
|
127
|
|
128 public void BuildDFA(IDFADefinition dfa) {
|
|
129 Safe.ArgumentNotNull(dfa,"dfa");
|
|
130
|
|
131 var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>(
|
|
132 (x, y) => x.SetEquals(y),
|
|
133 (x) => x.Sum(n => n.GetHashCode())
|
|
134 ));
|
|
135
|
|
136 stateMap[m_firstpos] = DefineState( dfa, m_firstpos);
|
|
137 Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE);
|
|
138
|
|
139 var queue = new Queue<HashSet<int>>();
|
|
140
|
|
141 queue.Enqueue(m_firstpos);
|
|
142
|
|
143 while (queue.Count > 0) {
|
|
144 var state = queue.Dequeue();
|
|
145 var s1 = stateMap[state];
|
|
146
|
|
147 for (int a = 0; a < dfa.AlphabetSize; a++) {
|
|
148 var next = new HashSet<int>();
|
|
149 foreach (var p in state) {
|
|
150 if (m_indexes[p] == a) {
|
|
151 next.UnionWith(Followpos(p));
|
|
152 }
|
|
153 }
|
|
154 if (next.Count > 0) {
|
|
155 int s2;
|
|
156 if (!stateMap.TryGetValue(next, out s2)) {
|
|
157 stateMap[next] = s2 = DefineState(dfa, next);
|
|
158 queue.Enqueue(next);
|
|
159 }
|
|
160 dfa.DefineTransition(s1, s2, a);
|
|
161 }
|
|
162 }
|
|
163
|
|
164 }
|
|
165 }
|
|
166
|
|
167 int[] GetStateTags(HashSet<int> state) {
|
|
168 Debug.Assert(state != null);
|
|
169 return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray();
|
|
170 }
|
|
171
|
|
172 int DefineState(IDFADefinition automa, HashSet<int> state) {
|
|
173 Debug.Assert(automa != null);
|
|
174 Debug.Assert(state != null);
|
|
175
|
|
176 var tags = GetStateTags(state);
|
|
177
|
|
178 return tags.Length > 0 ? automa.AddState(tags) : automa.AddState();
|
|
179 }
|
|
180
|
|
181 }
|
|
182 }
|