Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/DFABuilder.cs @ 162:0526412bbb26 ref20160224
DFA refactoring
| author | cin |
|---|---|
| date | Wed, 24 Feb 2016 08:39:53 +0300 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 161:2a8466f0cb8a | 162:0526412bbb26 |
|---|---|
| 1 using Implab; | |
| 2 using System; | |
| 3 using System.Collections.Generic; | |
| 4 using System.Diagnostics; | |
| 5 using System.Linq; | |
| 6 | |
| 7 namespace Implab.Automaton.RegularExpressions { | |
| 8 /// <summary> | |
| 9 /// Используется для построения ДКА по регулярному выражению, сначала обходит | |
| 10 /// регулярное выражение и вычисляет followpos, затем используется метод | |
| 11 /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. | |
| 12 /// </summary> | |
| 13 public class DFABuilder<TTag> : IVisitor<TTag> { | |
| 14 int m_idx = 0; | |
| 15 Token<TTag> m_root; | |
| 16 HashSet<int> m_firstpos; | |
| 17 HashSet<int> m_lastpos; | |
| 18 | |
| 19 readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); | |
| 20 readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); | |
| 21 readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); | |
| 22 | |
| 23 public Dictionary<int, HashSet<int>> FollowposMap { | |
| 24 get { return m_followpos; } | |
| 25 } | |
| 26 | |
| 27 public HashSet<int> Followpos(int pos) { | |
| 28 HashSet<int> set; | |
| 29 if (m_followpos.TryGetValue(pos, out set)) | |
| 30 return set; | |
| 31 return m_followpos[pos] = new HashSet<int>(); | |
| 32 } | |
| 33 | |
| 34 bool Nullable(object n) { | |
| 35 if (n is EmptyToken<TTag> || n is StarToken<TTag>) | |
| 36 return true; | |
| 37 if (n is AltToken<TTag>) | |
| 38 return Nullable(((AltToken<TTag>)n).Left) || Nullable(((AltToken<TTag>)n).Right); | |
| 39 if (n is CatToken<TTag>) | |
| 40 return Nullable(((CatToken<TTag>)n).Left) && Nullable(((CatToken<TTag>)n).Right); | |
| 41 return false; | |
| 42 } | |
| 43 | |
| 44 | |
| 45 public void Visit(AltToken<TTag> token) { | |
| 46 if (m_root == null) | |
| 47 m_root = token; | |
| 48 var firtspos = new HashSet<int>(); | |
| 49 var lastpos = new HashSet<int>(); | |
| 50 | |
| 51 token.Left.Accept(this); | |
| 52 firtspos.UnionWith(m_firstpos); | |
| 53 lastpos.UnionWith(m_lastpos); | |
| 54 | |
| 55 token.Right.Accept(this); | |
| 56 firtspos.UnionWith(m_firstpos); | |
| 57 lastpos.UnionWith(m_lastpos); | |
| 58 | |
| 59 m_firstpos = firtspos; | |
| 60 m_lastpos = lastpos; | |
| 61 } | |
| 62 | |
| 63 public void Visit(StarToken<TTag> token) { | |
| 64 if (m_root == null) | |
| 65 m_root = token; | |
| 66 token.Token.Accept(this); | |
| 67 | |
| 68 foreach (var i in m_lastpos) | |
| 69 Followpos(i).UnionWith(m_firstpos); | |
| 70 } | |
| 71 | |
| 72 public void Visit(CatToken<TTag> token) { | |
| 73 if (m_root == null) | |
| 74 m_root = token; | |
| 75 | |
| 76 var firtspos = new HashSet<int>(); | |
| 77 var lastpos = new HashSet<int>(); | |
| 78 token.Left.Accept(this); | |
| 79 firtspos.UnionWith(m_firstpos); | |
| 80 var leftLastpos = m_lastpos; | |
| 81 | |
| 82 token.Right.Accept(this); | |
| 83 lastpos.UnionWith(m_lastpos); | |
| 84 var rightFirstpos = m_firstpos; | |
| 85 | |
| 86 if (Nullable(token.Left)) | |
| 87 firtspos.UnionWith(rightFirstpos); | |
| 88 | |
| 89 if (Nullable(token.Right)) | |
| 90 lastpos.UnionWith(leftLastpos); | |
| 91 | |
| 92 m_firstpos = firtspos; | |
| 93 m_lastpos = lastpos; | |
| 94 | |
| 95 foreach (var i in leftLastpos) | |
| 96 Followpos(i).UnionWith(rightFirstpos); | |
| 97 | |
| 98 } | |
| 99 | |
| 100 public void Visit(EmptyToken<TTag> token) { | |
| 101 if (m_root == null) | |
| 102 m_root = token; | |
| 103 } | |
| 104 | |
| 105 public void Visit(SymbolToken<TTag> token) { | |
| 106 if (m_root == null) | |
| 107 m_root = token; | |
| 108 m_idx++; | |
| 109 m_indexes[m_idx] = token.Value; | |
| 110 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
| 111 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 112 } | |
| 113 | |
| 114 public void Visit(EndToken<TTag> token) { | |
| 115 if (m_root == null) | |
| 116 m_root = token; | |
| 117 m_idx++; | |
| 118 m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; | |
| 119 m_firstpos = new HashSet<int>(new[] { m_idx }); | |
| 120 m_lastpos = new HashSet<int>(new[] { m_idx }); | |
| 121 Followpos(m_idx); | |
| 122 m_ends.Add(m_idx, token.Tag); | |
| 123 } | |
| 124 | |
| 125 public void BuildDFA(IDFADefinitionBuilder<TTag> dfa, IAlphabetBuilder<int> states) { | |
| 126 Safe.ArgumentNotNull(dfa,"dfa"); | |
| 127 | |
| 128 var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( | |
| 129 (x, y) => x.SetEquals(y), | |
| 130 x => x.Sum(n => n.GetHashCode()) | |
| 131 )); | |
| 132 | |
| 133 int nextState = 0; | |
| 134 | |
| 135 int initialState = states.DefineSymbol(nextState++); | |
| 136 stateMap[m_firstpos] = initialState; | |
| 137 | |
| 138 var tags = GetStateTags(m_firstpos); | |
| 139 if (tags != null && tags.Length > 0) | |
| 140 dfa.MarkFinalState(initialState, tags); | |
| 141 | |
| 142 var inputMax = m_indexes.Values.Max(); | |
| 143 var queue = new Queue<HashSet<int>>(); | |
| 144 | |
| 145 queue.Enqueue(m_firstpos); | |
| 146 | |
| 147 while (queue.Count > 0) { | |
| 148 var state = queue.Dequeue(); | |
| 149 var s1 = stateMap[state]; | |
| 150 | |
| 151 for (int a = 0; a <= inputMax; a++) { | |
| 152 var next = new HashSet<int>(); | |
| 153 foreach (var p in state) { | |
| 154 if (m_indexes[p] == a) { | |
| 155 next.UnionWith(Followpos(p)); | |
| 156 } | |
| 157 } | |
| 158 if (next.Count > 0) { | |
| 159 int s2; | |
| 160 if (!stateMap.TryGetValue(next, out s2)) { | |
| 161 s2 = states.DefineSymbol(nextState++); | |
| 162 stateMap[next] = s2; | |
| 163 tags = GetStateTags(next); | |
| 164 if (tags != null && tags.Length > 0) | |
| 165 dfa.MarkFinalState(s2, tags); | |
| 166 | |
| 167 queue.Enqueue(next); | |
| 168 } | |
| 169 dfa.DefineTransition(s1, s2, a); | |
| 170 } | |
| 171 } | |
| 172 } | |
| 173 } | |
| 174 | |
| 175 TTag[] GetStateTags(IEnumerable<int> state) { | |
| 176 Debug.Assert(state != null); | |
| 177 return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); | |
| 178 } | |
| 179 | |
| 180 } | |
| 181 } |
