view Implab/Automaton/IndexedAlphabetBase.cs @ 164:ec35731ae299 ref20160224

Almost complete DFA refactoring
author cin
date Thu, 25 Feb 2016 02:11:13 +0300
parents 0526412bbb26
children 96681e9d0cea
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace Implab.Automaton {
    /// <summary>
    /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
    /// </summary>
    public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
        int m_nextId = 1;
        readonly int[] m_map;

        public int Count {
            get { return m_nextId; }
        }

        protected IndexedAlphabetBase(int mapSize) {
            m_map = new int[mapSize];
        }

        protected IndexedAlphabetBase(int[] map) {
            Debug.Assert(map != null);

            m_map = map;
            m_nextId = map.Max() + 1;
        }

        public int DefineSymbol(T symbol) {
            var index = GetSymbolIndex(symbol);
            if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
                m_map[index] = m_nextId++;
            return m_map[index];
        }

        public int DefineClass(IEnumerable<T> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");
            symbols = symbols.Distinct();

            foreach (var symbol in symbols) {
                var index = GetSymbolIndex(symbol);
                if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
                    m_map[GetSymbolIndex(symbol)] = m_nextId;
                else
                    throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
            }
            return m_nextId++;
        }

        public List<T>[] CreateReverseMap() {
            return
                Enumerable.Range(0, Count)
                    .Select(
                        i => InputSymbols
                        .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i)
                            .ToList()
                    )
                    .ToArray();
        }

        public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) {
            Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
            Safe.ArgumentNotNull(classes, "classes");
            var reverseMap = CreateReverseMap();

            var translationMap = new int[Count];

            foreach (var scl in classes) {
                // skip if the supper class contains the unclassified element
                if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT))
                    continue;
                var range = new List<T>();
                foreach (var cl in scl) {
                    if (cl < 0 || cl >= reverseMap.Length)
                        throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
                    range.AddRange(reverseMap[cl]);
                }
                var newClass = newAlphabet.DefineClass(range);
                foreach (var cl in scl)
                    translationMap[cl] = newClass;
            }

            return translationMap;
        }

        public virtual int Translate(T symbol) {
            return m_map[GetSymbolIndex(symbol)];
        }

        public abstract int GetSymbolIndex(T symbol);

        public abstract IEnumerable<T> InputSymbols { get; }

        /// <summary>
        /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
        /// </summary>
        /// <returns>The translation map.</returns>
        public int[] GetTranslationMap() {
            return m_map;
        }
    }
}