Mercurial > pub > ImplabNet
view Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 180:c32688129f14 ref20160224
refactoring complete, JSONParser rewritten
author | cin |
---|---|
date | Thu, 24 Mar 2016 02:30:46 +0300 |
parents | d5c5db0335ee |
children | 302ca905c19e |
line wrap: on
line source
using Implab; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; namespace Implab.Automaton.RegularExpressions { /// <summary> /// Используется для построения ДКА по регулярному выражению, сначала обходит /// регулярное выражение и вычисляет followpos, затем используется метод /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. /// </summary> public class RegularExpressionVisitor : IVisitor { int m_idx; Token m_root; HashSet<int> m_firstpos; HashSet<int> m_lastpos; readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); readonly HashSet<int> m_ends = new HashSet<int>(); readonly IDFATableBuilder m_builder; readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( false, new CustomEqualityComparer<HashSet<int>>( (x, y) => x.SetEquals(y), x => x.Sum(n => n.GetHashCode()) ) ); public RegularExpressionVisitor(IDFATableBuilder builder) { Safe.ArgumentNotNull(builder, "builder"); m_builder = builder; } HashSet<int> Followpos(int pos) { HashSet<int> set; return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); } bool Nullable(object n) { if (n is EmptyToken || n is StarToken) return true; var altToken = n as AltToken; if (altToken != null) return Nullable(altToken.Left) || Nullable(altToken.Right); var catToken = n as CatToken; if (catToken != null) return Nullable(catToken.Left) && Nullable(catToken.Right); return false; } protected int Index { get { return m_idx; } } public void Visit(AltToken token) { if (m_root == null) m_root = token; var firtspos = new HashSet<int>(); var lastpos = new HashSet<int>(); token.Left.Accept(this); firtspos.UnionWith(m_firstpos); lastpos.UnionWith(m_lastpos); token.Right.Accept(this); firtspos.UnionWith(m_firstpos); lastpos.UnionWith(m_lastpos); m_firstpos = firtspos; m_lastpos = lastpos; } public void Visit(StarToken token) { if (m_root == null) m_root = token; token.Token.Accept(this); foreach (var i in m_lastpos) Followpos(i).UnionWith(m_firstpos); } public void Visit(CatToken token) { if (m_root == null) m_root = token; var firtspos = new HashSet<int>(); var lastpos = new HashSet<int>(); token.Left.Accept(this); firtspos.UnionWith(m_firstpos); var leftLastpos = m_lastpos; token.Right.Accept(this); lastpos.UnionWith(m_lastpos); var rightFirstpos = m_firstpos; if (Nullable(token.Left)) firtspos.UnionWith(rightFirstpos); if (Nullable(token.Right)) lastpos.UnionWith(leftLastpos); m_firstpos = firtspos; m_lastpos = lastpos; foreach (var i in leftLastpos) Followpos(i).UnionWith(rightFirstpos); } public void Visit(EmptyToken token) { if (m_root == null) m_root = token; } public void Visit(SymbolToken token) { if (m_root == null) m_root = token; m_idx++; m_indexes[m_idx] = token.Value; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); } public virtual void Visit(EndToken token) { if (m_root == null) m_root = token; m_idx++; m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); Followpos(m_idx); m_ends.Add(m_idx); } public void BuildDFA() { AddState(m_firstpos); SetInitialState(m_firstpos); if(IsFinal(m_firstpos)) MarkFinalState(m_firstpos); var inputMax = m_indexes.Values.Max(); var queue = new Queue<HashSet<int>>(); queue.Enqueue(m_firstpos); while (queue.Count > 0) { var s1 = queue.Dequeue(); for (int a = 0; a <= inputMax; a++) { var s2 = new HashSet<int>(); foreach (var p in s1) { if (m_indexes[p] == a) { s2.UnionWith(Followpos(p)); } } if (s2.Count > 0) { if (!HasState(s2)) { AddState(s2); if (IsFinal(s2)) MarkFinalState(s2); queue.Enqueue(s2); } DefineTransition(s1, s2, a); } } } } protected bool HasState(HashSet<int> state) { return m_states.Contains(state); } protected void AddState(HashSet<int> state) { Debug.Assert(!HasState(state)); m_states.DefineSymbol(state); } protected int Translate(HashSet<int> state) { Debug.Assert(HasState(state)); return m_states.Translate(state); } protected virtual void SetInitialState(HashSet<int> state) { m_builder.SetInitialState(Translate(state)); } protected virtual void MarkFinalState(HashSet<int> state) { m_builder.MarkFinalState(Translate(state)); } protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); } bool IsFinal(IEnumerable<int> state) { Debug.Assert(state != null); return state.Any(m_ends.Contains); } } }