view Implab/Automaton/RegularExpressions/RegularDFA.cs @ 229:5f7a3e1d32b9 v2

JsonXmlReader performance tuning JsonScanner now operates strings and doesn't parses number and literals. Added SerializationHelpers to common serialize/deserialize operations
author cin
date Tue, 12 Sep 2017 19:07:42 +0300
parents 4f82e0f161c3
children fa6cbf4d8841
line wrap: on
line source

using System.Collections.Generic;
using System.Linq;

namespace Implab.Automaton.RegularExpressions {
    public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {

        readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
        readonly IAlphabet<TInput> m_alphabet;

        public RegularDFA(IAlphabet<TInput> alphabet) {
            Safe.ArgumentNotNull(alphabet, "aplhabet");

            m_alphabet = alphabet;
        }


        public IAlphabet<TInput> InputAlphabet {
            get {
                return m_alphabet;
            }
        }

        public void MarkFinalState(int s, TTag[] tags) {
            MarkFinalState(s);
            SetStateTag(s, tags);
        }

        public void SetStateTag(int s, TTag[] tags) {
            Safe.ArgumentNotNull(tags, "tags");
            m_tags[s] = tags;
        }

        public TTag[] GetStateTag(int s) {
            TTag[] tags;
            return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
        }

        public TTag[][] CreateTagTable() {
            var table = new TTag[StateCount][];

            foreach (var pair in m_tags)
                table[pair.Key] = pair.Value;

            return table;
        }

        /// <summary>
        /// Optimize the specified alphabet.
        /// </summary>
        /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
        public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
            Safe.ArgumentNotNull(alphabet, "alphabet");

            var dfa = new RegularDFA<TInput, TTag>(alphabet);

            var alphaMap = new Dictionary<int,int>();
            var stateMap = new Dictionary<int,int>();

            Optimize(dfa, alphaMap, stateMap); 

            // mark tags in the new DFA
            foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
                dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());

            // make the alphabet for the new DFA
            // skip all unclassified symbols
            foreach (var pair in alphaMap.Where(x => x.Value != 0))
                alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
            return dfa;
        }

        protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
            var arrayComparer = new CustomEqualityComparer<TTag[]>(
                (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
                x => x.Sum(it => x.GetHashCode())
            );
            return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g));
        }

        public override string ToString() {
            var states = new MapAlphabet<string>(false, null);

            for (int i = 0; i < StateCount; i++)
                states.DefineSymbol(string.Format("s{0}", i), i);

            return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
        }

    }
}