Mercurial > pub > ImplabNet
diff Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 190:1c2a16d071a7 v2
Слияние с ref20160224
author | cin |
---|---|
date | Fri, 22 Apr 2016 13:08:08 +0300 |
parents | d5c5db0335ee |
children | 302ca905c19e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Fri Apr 22 13:08:08 2016 +0300 @@ -0,0 +1,212 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + /// <summary> + /// Используется для построения ДКА по регулярному выражению, сначала обходит + /// регулярное выражение и вычисляет followpos, затем используется метод + /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. + /// </summary> + public class RegularExpressionVisitor : IVisitor { + int m_idx; + Token m_root; + HashSet<int> m_firstpos; + HashSet<int> m_lastpos; + + readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); + readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); + readonly HashSet<int> m_ends = new HashSet<int>(); + + readonly IDFATableBuilder m_builder; + readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( + false, + new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + x => x.Sum(n => n.GetHashCode()) + ) + ); + + public RegularExpressionVisitor(IDFATableBuilder builder) { + Safe.ArgumentNotNull(builder, "builder"); + + m_builder = builder; + } + + HashSet<int> Followpos(int pos) { + HashSet<int> set; + return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); + } + + bool Nullable(object n) { + if (n is EmptyToken || n is StarToken) + return true; + var altToken = n as AltToken; + if (altToken != null) + return Nullable(altToken.Left) || Nullable(altToken.Right); + var catToken = n as CatToken; + if (catToken != null) + return Nullable(catToken.Left) && Nullable(catToken.Right); + return false; + } + + protected int Index { + get { return m_idx; } + } + + public void Visit(AltToken token) { + if (m_root == null) + m_root = token; + var firtspos = new HashSet<int>(); + var lastpos = new HashSet<int>(); + + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + token.Right.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + } + + public void Visit(StarToken token) { + if (m_root == null) + m_root = token; + token.Token.Accept(this); + + foreach (var i in m_lastpos) + Followpos(i).UnionWith(m_firstpos); + } + + public void Visit(CatToken token) { + if (m_root == null) + m_root = token; + + var firtspos = new HashSet<int>(); + var lastpos = new HashSet<int>(); + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + var leftLastpos = m_lastpos; + + token.Right.Accept(this); + lastpos.UnionWith(m_lastpos); + var rightFirstpos = m_firstpos; + + if (Nullable(token.Left)) + firtspos.UnionWith(rightFirstpos); + + if (Nullable(token.Right)) + lastpos.UnionWith(leftLastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + + foreach (var i in leftLastpos) + Followpos(i).UnionWith(rightFirstpos); + + } + + public void Visit(EmptyToken token) { + if (m_root == null) + m_root = token; + } + + public void Visit(SymbolToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = token.Value; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + } + + public virtual void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx); + } + + public void BuildDFA() { + AddState(m_firstpos); + SetInitialState(m_firstpos); + + if(IsFinal(m_firstpos)) + MarkFinalState(m_firstpos); + + var inputMax = m_indexes.Values.Max(); + var queue = new Queue<HashSet<int>>(); + + queue.Enqueue(m_firstpos); + + while (queue.Count > 0) { + var s1 = queue.Dequeue(); + + for (int a = 0; a <= inputMax; a++) { + var s2 = new HashSet<int>(); + foreach (var p in s1) { + if (m_indexes[p] == a) { + s2.UnionWith(Followpos(p)); + } + } + if (s2.Count > 0) { + if (!HasState(s2)) { + AddState(s2); + if (IsFinal(s2)) + MarkFinalState(s2); + + queue.Enqueue(s2); + } + + DefineTransition(s1, s2, a); + } + + } + } + } + + protected bool HasState(HashSet<int> state) { + return m_states.Contains(state); + } + + protected void AddState(HashSet<int> state) { + Debug.Assert(!HasState(state)); + + m_states.DefineSymbol(state); + } + + protected int Translate(HashSet<int> state) { + Debug.Assert(HasState(state)); + + return m_states.Translate(state); + } + + protected virtual void SetInitialState(HashSet<int> state) { + m_builder.SetInitialState(Translate(state)); + } + + protected virtual void MarkFinalState(HashSet<int> state) { + m_builder.MarkFinalState(Translate(state)); + } + + protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { + + m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); + } + + bool IsFinal(IEnumerable<int> state) { + Debug.Assert(state != null); + return state.Any(m_ends.Contains); + } + + } +}