Mercurial > pub > ImplabNet
diff Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 178:d5c5db0335ee ref20160224
working on JSON parser
author | cin |
---|---|
date | Wed, 23 Mar 2016 19:51:45 +0300 |
parents | a0ff6a0e9c44 |
children | 302ca905c19e |
line wrap: on
line diff
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 19:51:45 2016 +0300 @@ -10,7 +10,7 @@ /// регулярное выражение и вычисляет followpos, затем используется метод /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. /// </summary> - public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { + public class RegularExpressionVisitor : IVisitor { int m_idx; Token m_root; HashSet<int> m_firstpos; @@ -19,13 +19,23 @@ readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); readonly HashSet<int> m_ends = new HashSet<int>(); - readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); - public Dictionary<int, HashSet<int>> FollowposMap { - get { return m_followpos; } + readonly IDFATableBuilder m_builder; + readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( + false, + new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + x => x.Sum(n => n.GetHashCode()) + ) + ); + + public RegularExpressionVisitor(IDFATableBuilder builder) { + Safe.ArgumentNotNull(builder, "builder"); + + m_builder = builder; } - public HashSet<int> Followpos(int pos) { + HashSet<int> Followpos(int pos) { HashSet<int> set; return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); } @@ -42,6 +52,9 @@ return false; } + protected int Index { + get { return m_idx; } + } public void Visit(AltToken token) { if (m_root == null) @@ -112,45 +125,23 @@ m_lastpos = new HashSet<int>(new[] { m_idx }); } - public void Visit(EndToken<TTag> token) { + public virtual void Visit(EndToken token) { if (m_root == null) m_root = token; m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; - m_firstpos = new HashSet<int>(new[] { m_idx }); - m_lastpos = new HashSet<int>(new[] { m_idx }); - Followpos(m_idx); - m_ends.Add(m_idx); - m_tags.Add(m_idx, token.Tag); - } - - public void Visit(EndToken token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); Followpos(m_idx); m_ends.Add(m_idx); } - public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { - Safe.ArgumentNotNull(dfa,"dfa"); + public void BuildDFA() { + AddState(m_firstpos); + SetInitialState(m_firstpos); - var states = new MapAlphabet<HashSet<int>>( - false, - new CustomEqualityComparer<HashSet<int>>( - (x, y) => x.SetEquals(y), - x => x.Sum(n => n.GetHashCode()) - )); - - var initialState = states.DefineSymbol(m_firstpos); - dfa.SetInitialState(initialState); - - var tags = GetStateTags(m_firstpos); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(initialState, tags); + if(IsFinal(m_firstpos)) + MarkFinalState(m_firstpos); var inputMax = m_indexes.Values.Max(); var queue = new Queue<HashSet<int>>(); @@ -158,49 +149,64 @@ queue.Enqueue(m_firstpos); while (queue.Count > 0) { - var state = queue.Dequeue(); - var s1 = states.Translate(state); - Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); + var s1 = queue.Dequeue(); for (int a = 0; a <= inputMax; a++) { - var next = new HashSet<int>(); - foreach (var p in state) { + var s2 = new HashSet<int>(); + foreach (var p in s1) { if (m_indexes[p] == a) { - next.UnionWith(Followpos(p)); + s2.UnionWith(Followpos(p)); } } - if (next.Count > 0) { - int s2; - if (states.Contains(next)) { - s2 = states.Translate(next); - } else { - s2 = states.DefineSymbol(next); + if (s2.Count > 0) { + if (!HasState(s2)) { + AddState(s2); + if (IsFinal(s2)) + MarkFinalState(s2); + + queue.Enqueue(s2); + } - if (IsFinal(next)) { - - dfa.MarkFinalState(s2); - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) - dfa.SetStateTag(s2, tags); - } - - queue.Enqueue(next); - } - dfa.Add(new AutomatonTransition(s1, s2, a)); + DefineTransition(s1, s2, a); } + } } } + protected bool HasState(HashSet<int> state) { + return m_states.Contains(state); + } + + protected void AddState(HashSet<int> state) { + Debug.Assert(!HasState(state)); + + m_states.DefineSymbol(state); + } + + protected int Translate(HashSet<int> state) { + Debug.Assert(HasState(state)); + + return m_states.Translate(state); + } + + protected virtual void SetInitialState(HashSet<int> state) { + m_builder.SetInitialState(Translate(state)); + } + + protected virtual void MarkFinalState(HashSet<int> state) { + m_builder.MarkFinalState(Translate(state)); + } + + protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { + + m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); + } + bool IsFinal(IEnumerable<int> state) { Debug.Assert(state != null); return state.Any(m_ends.Contains); } - TTag[] GetStateTags(IEnumerable<int> state) { - Debug.Assert(state != null); - return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); - } - } }