Mercurial > pub > ImplabNet
view Implab/Automaton/RegularExpressions/RegularDFABuilder.cs @ 164:ec35731ae299 ref20160224
Almost complete DFA refactoring
author | cin |
---|---|
date | Thu, 25 Feb 2016 02:11:13 +0300 |
parents | 419aa51b04fd |
children | e227e78d72e4 |
line wrap: on
line source
using Implab; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; namespace Implab.Automaton.RegularExpressions { /// <summary> /// Используется для построения ДКА по регулярному выражению, сначала обходит /// регулярное выражение и вычисляет followpos, затем используется метод /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. /// </summary> public class RegularDFABuilder<TTag> : IVisitor<TTag> { int m_idx = 0; Token<TTag> m_root; HashSet<int> m_firstpos; HashSet<int> m_lastpos; readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); public Dictionary<int, HashSet<int>> FollowposMap { get { return m_followpos; } } public HashSet<int> Followpos(int pos) { HashSet<int> set; if (m_followpos.TryGetValue(pos, out set)) return set; return m_followpos[pos] = new HashSet<int>(); } bool Nullable(object n) { if (n is EmptyToken<TTag> || n is StarToken<TTag>) return true; if (n is AltToken<TTag>) return Nullable(((AltToken<TTag>)n).Left) || Nullable(((AltToken<TTag>)n).Right); if (n is CatToken<TTag>) return Nullable(((CatToken<TTag>)n).Left) && Nullable(((CatToken<TTag>)n).Right); return false; } public void Visit(AltToken<TTag> token) { if (m_root == null) m_root = token; var firtspos = new HashSet<int>(); var lastpos = new HashSet<int>(); token.Left.Accept(this); firtspos.UnionWith(m_firstpos); lastpos.UnionWith(m_lastpos); token.Right.Accept(this); firtspos.UnionWith(m_firstpos); lastpos.UnionWith(m_lastpos); m_firstpos = firtspos; m_lastpos = lastpos; } public void Visit(StarToken<TTag> token) { if (m_root == null) m_root = token; token.Token.Accept(this); foreach (var i in m_lastpos) Followpos(i).UnionWith(m_firstpos); } public void Visit(CatToken<TTag> token) { if (m_root == null) m_root = token; var firtspos = new HashSet<int>(); var lastpos = new HashSet<int>(); token.Left.Accept(this); firtspos.UnionWith(m_firstpos); var leftLastpos = m_lastpos; token.Right.Accept(this); lastpos.UnionWith(m_lastpos); var rightFirstpos = m_firstpos; if (Nullable(token.Left)) firtspos.UnionWith(rightFirstpos); if (Nullable(token.Right)) lastpos.UnionWith(leftLastpos); m_firstpos = firtspos; m_lastpos = lastpos; foreach (var i in leftLastpos) Followpos(i).UnionWith(rightFirstpos); } public void Visit(EmptyToken<TTag> token) { if (m_root == null) m_root = token; } public void Visit(SymbolToken<TTag> token) { if (m_root == null) m_root = token; m_idx++; m_indexes[m_idx] = token.Value; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); } public void Visit(EndToken<TTag> token) { if (m_root == null) m_root = token; m_idx++; m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); Followpos(m_idx); m_ends.Add(m_idx, token.Tag); } public void BuildDFA(IDFATransitionTableBuilder<TTag> dfa) { Safe.ArgumentNotNull(dfa,"dfa"); var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( (x, y) => x.SetEquals(y), x => x.Sum(n => n.GetHashCode()) )); var initialState = states.DefineSymbol(m_firstpos); dfa.SetInitialState(initialState); var tags = GetStateTags(m_firstpos); if (tags != null && tags.Length > 0) dfa.MarkFinalState(initialState, tags); var inputMax = m_indexes.Values.Max(); var queue = new Queue<HashSet<int>>(); queue.Enqueue(m_firstpos); while (queue.Count > 0) { var state = queue.Dequeue(); var s1 = states.Translate(state); Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); for (int a = 0; a <= inputMax; a++) { var next = new HashSet<int>(); foreach (var p in state) { if (m_indexes[p] == a) { next.UnionWith(Followpos(p)); } } if (next.Count > 0) { int s2 = states.Translate(next); if (s2 == DFAConst.UNCLASSIFIED_INPUT) { s2 = states.DefineSymbol(next); tags = GetStateTags(next); if (tags != null && tags.Length > 0) dfa.MarkFinalState(s2, tags); queue.Enqueue(next); } dfa.DefineTransition(s1, s2, a); } } } } TTag[] GetStateTags(IEnumerable<int> state) { Debug.Assert(state != null); return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); } } }