view Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 209:a867536c68fc v2

Bound promise to CancellationToken Added new states to ExecutionSate enum. Added Safe.Guard() method to handle cleanup of the result of the promise
author cin
date Wed, 16 Nov 2016 03:06:08 +0300
parents d5c5db0335ee
children 302ca905c19e
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace Implab.Automaton.RegularExpressions {
    /// <summary>
    /// Используется для построения ДКА по регулярному выражению, сначала обходит
    /// регулярное выражение и вычисляет followpos, затем используется метод
    /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
    /// </summary>
    public class RegularExpressionVisitor : IVisitor {
        int m_idx;
        Token m_root;
        HashSet<int> m_firstpos;
        HashSet<int> m_lastpos;

        readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
        readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
        readonly HashSet<int> m_ends = new HashSet<int>();

        readonly IDFATableBuilder m_builder;
        readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
            false,
            new CustomEqualityComparer<HashSet<int>>(
                (x, y) => x.SetEquals(y),
                x => x.Sum(n => n.GetHashCode())
            )
        );

        public RegularExpressionVisitor(IDFATableBuilder builder) {
            Safe.ArgumentNotNull(builder, "builder");

            m_builder = builder;
        }

        HashSet<int> Followpos(int pos) {
            HashSet<int> set;
            return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
        }

        bool Nullable(object n) {
            if (n is EmptyToken || n is StarToken)
                return true;
            var altToken = n as AltToken;
            if (altToken != null)
                return Nullable(altToken.Left) || Nullable(altToken.Right);
            var catToken = n as CatToken;
            if (catToken != null)
                return Nullable(catToken.Left) && Nullable(catToken.Right);
            return false;
        }

        protected int Index {
            get { return m_idx; }
        }

        public void Visit(AltToken token) {
            if (m_root == null)
                m_root = token;
            var firtspos = new HashSet<int>();
            var lastpos = new HashSet<int>();

            token.Left.Accept(this);
            firtspos.UnionWith(m_firstpos);
            lastpos.UnionWith(m_lastpos);

            token.Right.Accept(this);
            firtspos.UnionWith(m_firstpos);
            lastpos.UnionWith(m_lastpos);

            m_firstpos = firtspos;
            m_lastpos = lastpos;
        }

        public void Visit(StarToken token) {
            if (m_root == null)
                m_root = token;
            token.Token.Accept(this);

            foreach (var i in m_lastpos)
                Followpos(i).UnionWith(m_firstpos);
        }

        public void Visit(CatToken token) {
            if (m_root == null)
                m_root = token;

            var firtspos = new HashSet<int>();
            var lastpos = new HashSet<int>();
            token.Left.Accept(this);
            firtspos.UnionWith(m_firstpos);
            var leftLastpos = m_lastpos;

            token.Right.Accept(this);
            lastpos.UnionWith(m_lastpos);
            var rightFirstpos = m_firstpos;

            if (Nullable(token.Left))
                firtspos.UnionWith(rightFirstpos);

            if (Nullable(token.Right))
                lastpos.UnionWith(leftLastpos);

            m_firstpos = firtspos;
            m_lastpos = lastpos;

            foreach (var i in leftLastpos)
                Followpos(i).UnionWith(rightFirstpos);

        }

        public void Visit(EmptyToken token) {
            if (m_root == null)
                m_root = token;
        }

        public void Visit(SymbolToken token) {
            if (m_root == null)
                m_root = token;
            m_idx++;
            m_indexes[m_idx] = token.Value;
            m_firstpos = new HashSet<int>(new[] { m_idx });
            m_lastpos = new HashSet<int>(new[] { m_idx });
        }

        public virtual void Visit(EndToken token) {
            if (m_root == null)
                m_root = token;
            m_idx++;
            m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
            m_firstpos = new HashSet<int>(new[] { m_idx });
            m_lastpos = new HashSet<int>(new[] { m_idx });
            Followpos(m_idx);
            m_ends.Add(m_idx);
        }

        public void BuildDFA() {
            AddState(m_firstpos);
            SetInitialState(m_firstpos);

            if(IsFinal(m_firstpos))
                MarkFinalState(m_firstpos);

            var inputMax = m_indexes.Values.Max();
            var queue = new Queue<HashSet<int>>();

            queue.Enqueue(m_firstpos);

            while (queue.Count > 0) {
                var s1 = queue.Dequeue();

                for (int a = 0; a <= inputMax; a++) {
                    var s2 = new HashSet<int>();
                    foreach (var p in s1) {
                        if (m_indexes[p] == a) {
                            s2.UnionWith(Followpos(p));
                        }
                    }
                    if (s2.Count > 0) {
                        if (!HasState(s2)) {
                            AddState(s2);
                            if (IsFinal(s2))
                                MarkFinalState(s2);
                            
                            queue.Enqueue(s2);
                        }

                        DefineTransition(s1, s2, a);
                    }

                }
            }
        }

        protected bool HasState(HashSet<int> state) {
            return m_states.Contains(state);
        }

        protected void AddState(HashSet<int> state) {
            Debug.Assert(!HasState(state));

            m_states.DefineSymbol(state);
        }

        protected int Translate(HashSet<int> state) {
            Debug.Assert(HasState(state));

            return m_states.Translate(state);
        }

        protected virtual void SetInitialState(HashSet<int> state) {
            m_builder.SetInitialState(Translate(state));
        }

        protected virtual void MarkFinalState(HashSet<int> state) {
            m_builder.MarkFinalState(Translate(state));
        }

        protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
            
            m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
        }

        bool IsFinal(IEnumerable<int> state) {
            Debug.Assert(state != null);
            return state.Any(m_ends.Contains);
        }

    }
}