view Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs @ 196:40d7fed4a09e

fixed promise chaining behavior, the error handler doesn't handle result or cancellation handlers exceptions these exceptions are propagated to the next handlers.
author cin
date Mon, 29 Aug 2016 23:15:51 +0300
parents d5c5db0335ee
children 302ca905c19e
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace Implab.Automaton.RegularExpressions {
    /// <summary>
    /// Используется для построения ДКА по регулярному выражению, сначала обходит
    /// регулярное выражение и вычисляет followpos, затем используется метод
    /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата.
    /// </summary>
    public class RegularExpressionVisitor : IVisitor {
        int m_idx;
        Token m_root;
        HashSet<int> m_firstpos;
        HashSet<int> m_lastpos;

        readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
        readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
        readonly HashSet<int> m_ends = new HashSet<int>();

        readonly IDFATableBuilder m_builder;
        readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>(
            false,
            new CustomEqualityComparer<HashSet<int>>(
                (x, y) => x.SetEquals(y),
                x => x.Sum(n => n.GetHashCode())
            )
        );

        public RegularExpressionVisitor(IDFATableBuilder builder) {
            Safe.ArgumentNotNull(builder, "builder");

            m_builder = builder;
        }

        HashSet<int> Followpos(int pos) {
            HashSet<int> set;
            return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>();
        }

        bool Nullable(object n) {
            if (n is EmptyToken || n is StarToken)
                return true;
            var altToken = n as AltToken;
            if (altToken != null)
                return Nullable(altToken.Left) || Nullable(altToken.Right);
            var catToken = n as CatToken;
            if (catToken != null)
                return Nullable(catToken.Left) && Nullable(catToken.Right);
            return false;
        }

        protected int Index {
            get { return m_idx; }
        }

        public void Visit(AltToken token) {
            if (m_root == null)
                m_root = token;
            var firtspos = new HashSet<int>();
            var lastpos = new HashSet<int>();

            token.Left.Accept(this);
            firtspos.UnionWith(m_firstpos);
            lastpos.UnionWith(m_lastpos);

            token.Right.Accept(this);
            firtspos.UnionWith(m_firstpos);
            lastpos.UnionWith(m_lastpos);

            m_firstpos = firtspos;
            m_lastpos = lastpos;
        }

        public void Visit(StarToken token) {
            if (m_root == null)
                m_root = token;
            token.Token.Accept(this);

            foreach (var i in m_lastpos)
                Followpos(i).UnionWith(m_firstpos);
        }

        public void Visit(CatToken token) {
            if (m_root == null)
                m_root = token;

            var firtspos = new HashSet<int>();
            var lastpos = new HashSet<int>();
            token.Left.Accept(this);
            firtspos.UnionWith(m_firstpos);
            var leftLastpos = m_lastpos;

            token.Right.Accept(this);
            lastpos.UnionWith(m_lastpos);
            var rightFirstpos = m_firstpos;

            if (Nullable(token.Left))
                firtspos.UnionWith(rightFirstpos);

            if (Nullable(token.Right))
                lastpos.UnionWith(leftLastpos);

            m_firstpos = firtspos;
            m_lastpos = lastpos;

            foreach (var i in leftLastpos)
                Followpos(i).UnionWith(rightFirstpos);

        }

        public void Visit(EmptyToken token) {
            if (m_root == null)
                m_root = token;
        }

        public void Visit(SymbolToken token) {
            if (m_root == null)
                m_root = token;
            m_idx++;
            m_indexes[m_idx] = token.Value;
            m_firstpos = new HashSet<int>(new[] { m_idx });
            m_lastpos = new HashSet<int>(new[] { m_idx });
        }

        public virtual void Visit(EndToken token) {
            if (m_root == null)
                m_root = token;
            m_idx++;
            m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
            m_firstpos = new HashSet<int>(new[] { m_idx });
            m_lastpos = new HashSet<int>(new[] { m_idx });
            Followpos(m_idx);
            m_ends.Add(m_idx);
        }

        public void BuildDFA() {
            AddState(m_firstpos);
            SetInitialState(m_firstpos);

            if(IsFinal(m_firstpos))
                MarkFinalState(m_firstpos);

            var inputMax = m_indexes.Values.Max();
            var queue = new Queue<HashSet<int>>();

            queue.Enqueue(m_firstpos);

            while (queue.Count > 0) {
                var s1 = queue.Dequeue();

                for (int a = 0; a <= inputMax; a++) {
                    var s2 = new HashSet<int>();
                    foreach (var p in s1) {
                        if (m_indexes[p] == a) {
                            s2.UnionWith(Followpos(p));
                        }
                    }
                    if (s2.Count > 0) {
                        if (!HasState(s2)) {
                            AddState(s2);
                            if (IsFinal(s2))
                                MarkFinalState(s2);
                            
                            queue.Enqueue(s2);
                        }

                        DefineTransition(s1, s2, a);
                    }

                }
            }
        }

        protected bool HasState(HashSet<int> state) {
            return m_states.Contains(state);
        }

        protected void AddState(HashSet<int> state) {
            Debug.Assert(!HasState(state));

            m_states.DefineSymbol(state);
        }

        protected int Translate(HashSet<int> state) {
            Debug.Assert(HasState(state));

            return m_states.Translate(state);
        }

        protected virtual void SetInitialState(HashSet<int> state) {
            m_builder.SetInitialState(Translate(state));
        }

        protected virtual void MarkFinalState(HashSet<int> state) {
            m_builder.MarkFinalState(Translate(state));
        }

        protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) {
            
            m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
        }

        bool IsFinal(IEnumerable<int> state) {
            Debug.Assert(state != null);
            return state.Any(m_ends.Contains);
        }

    }
}