view Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 187:dd4a3590f9c6 ref20160224

Reworked cancelation handling, if the cancel handler isn't specified the OperationCanceledException will be handled by the error handler Any unhandled OperationCanceledException will cause the promise cancelation
author cin
date Tue, 19 Apr 2016 17:35:20 +0300
parents d5c5db0335ee
children 302ca905c19e
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace Implab.Automaton.RegularExpressions {
    /// <summary>
    /// Используется для построения ДКА по регулярному выражению, сначала обходит
    /// регулярное выражение и вычисляет followpos, затем используется метод
    /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
    /// </summary>
    public class RegularExpressionVisitor : IVisitor {
        int m_idx;
        Token m_root;
        HashSet<int> m_firstpos;
        HashSet<int> m_lastpos;

        readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
        readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
        readonly HashSet<int> m_ends = new HashSet<int>();

        readonly IDFATableBuilder m_builder;
        readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
            false,
            new CustomEqualityComparer<HashSet<int>>(
                (x, y) => x.SetEquals(y),
                x => x.Sum(n => n.GetHashCode())
            )
        );

        public RegularExpressionVisitor(IDFATableBuilder builder) {
            Safe.ArgumentNotNull(builder, "builder");

            m_builder = builder;
        }

        HashSet<int> Followpos(int pos) {
            HashSet<int> set;
            return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
        }

        bool Nullable(object n) {
            if (n is EmptyToken || n is StarToken)
                return true;
            var altToken = n as AltToken;
            if (altToken != null)
                return Nullable(altToken.Left) || Nullable(altToken.Right);
            var catToken = n as CatToken;
            if (catToken != null)
                return Nullable(catToken.Left) && Nullable(catToken.Right);
            return false;
        }

        protected int Index {
            get { return m_idx; }
        }

        public void Visit(AltToken token) {
            if (m_root == null)
                m_root = token;
            var firtspos = new HashSet<int>();
            var lastpos = new HashSet<int>();

            token.Left.Accept(this);
            firtspos.UnionWith(m_firstpos);
            lastpos.UnionWith(m_lastpos);

            token.Right.Accept(this);
            firtspos.UnionWith(m_firstpos);
            lastpos.UnionWith(m_lastpos);

            m_firstpos = firtspos;
            m_lastpos = lastpos;
        }

        public void Visit(StarToken token) {
            if (m_root == null)
                m_root = token;
            token.Token.Accept(this);

            foreach (var i in m_lastpos)
                Followpos(i).UnionWith(m_firstpos);
        }

        public void Visit(CatToken token) {
            if (m_root == null)
                m_root = token;

            var firtspos = new HashSet<int>();
            var lastpos = new HashSet<int>();
            token.Left.Accept(this);
            firtspos.UnionWith(m_firstpos);
            var leftLastpos = m_lastpos;

            token.Right.Accept(this);
            lastpos.UnionWith(m_lastpos);
            var rightFirstpos = m_firstpos;

            if (Nullable(token.Left))
                firtspos.UnionWith(rightFirstpos);

            if (Nullable(token.Right))
                lastpos.UnionWith(leftLastpos);

            m_firstpos = firtspos;
            m_lastpos = lastpos;

            foreach (var i in leftLastpos)
                Followpos(i).UnionWith(rightFirstpos);

        }

        public void Visit(EmptyToken token) {
            if (m_root == null)
                m_root = token;
        }

        public void Visit(SymbolToken token) {
            if (m_root == null)
                m_root = token;
            m_idx++;
            m_indexes[m_idx] = token.Value;
            m_firstpos = new HashSet<int>(new[] { m_idx });
            m_lastpos = new HashSet<int>(new[] { m_idx });
        }

        public virtual void Visit(EndToken token) {
            if (m_root == null)
                m_root = token;
            m_idx++;
            m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
            m_firstpos = new HashSet<int>(new[] { m_idx });
            m_lastpos = new HashSet<int>(new[] { m_idx });
            Followpos(m_idx);
            m_ends.Add(m_idx);
        }

        public void BuildDFA() {
            AddState(m_firstpos);
            SetInitialState(m_firstpos);

            if(IsFinal(m_firstpos))
                MarkFinalState(m_firstpos);

            var inputMax = m_indexes.Values.Max();
            var queue = new Queue<HashSet<int>>();

            queue.Enqueue(m_firstpos);

            while (queue.Count > 0) {
                var s1 = queue.Dequeue();

                for (int a = 0; a <= inputMax; a++) {
                    var s2 = new HashSet<int>();
                    foreach (var p in s1) {
                        if (m_indexes[p] == a) {
                            s2.UnionWith(Followpos(p));
                        }
                    }
                    if (s2.Count > 0) {
                        if (!HasState(s2)) {
                            AddState(s2);
                            if (IsFinal(s2))
                                MarkFinalState(s2);
                            
                            queue.Enqueue(s2);
                        }

                        DefineTransition(s1, s2, a);
                    }

                }
            }
        }

        protected bool HasState(HashSet<int> state) {
            return m_states.Contains(state);
        }

        protected void AddState(HashSet<int> state) {
            Debug.Assert(!HasState(state));

            m_states.DefineSymbol(state);
        }

        protected int Translate(HashSet<int> state) {
            Debug.Assert(HasState(state));

            return m_states.Translate(state);
        }

        protected virtual void SetInitialState(HashSet<int> state) {
            m_builder.SetInitialState(Translate(state));
        }

        protected virtual void MarkFinalState(HashSet<int> state) {
            m_builder.MarkFinalState(Translate(state));
        }

        protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
            
            m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
        }

        bool IsFinal(IEnumerable<int> state) {
            Debug.Assert(state != null);
            return state.Any(m_ends.Contains);
        }

    }
}