view Implab/Automaton/IndexedAlphabetBase.cs @ 175:96a89dcb4060 ref20160224

sync
author cin
date Mon, 21 Mar 2016 18:41:45 +0300
parents 92d5278d1b10
children 0c3c69fe225b
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;

namespace Implab.Automaton {
    /// <summary>
    /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
    /// </summary>
    /// <remarks>
    /// Indexed alphabets are usefull in bulting efficient translations from source alphabet
    /// to the input alphabet of the automaton. It's assumed that the index to the symbol match
    /// is well known and documented.
    /// </remarks>
    public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
        int m_nextId = 1;
        readonly int[] m_map;

        protected IndexedAlphabetBase(int mapSize) {
            m_map = new int[mapSize];
        }

        protected IndexedAlphabetBase(int[] map) {
            Debug.Assert(map != null && map.Length > 0);
            Debug.Assert(map.All(x => x >= 0));

            m_map = map;
            m_nextId = map.Max() + 1;
        }

        public int DefineSymbol(T symbol) {
            var index = GetSymbolIndex(symbol);
            if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
                m_map[index] = m_nextId++;
            return m_map[index];
        }

        public int DefineSymbol(T symbol, int cls) {
            var index = GetSymbolIndex(symbol);
            m_map[index] = cls;
            m_nextId = Math.Max(cls + 1, m_nextId);
            return cls;
        }

        public int DefineClass(IEnumerable<T> symbols) {
            return DefineClass(symbols, m_nextId);
        }

        public int DefineClass(IEnumerable<T> symbols, int cls) {
            Safe.ArgumentNotNull(symbols, "symbols");
            symbols = symbols.Distinct();

            foreach (var symbol in symbols)
                m_map[GetSymbolIndex(symbol)] = cls;
            
            m_nextId = Math.Max(cls + 1, m_nextId);

            return cls;
        }

        public virtual int Translate(T symbol) {
            return m_map[GetSymbolIndex(symbol)];
        }

        public int Count {
            get { return m_nextId; }
        }

        public bool Contains(T symbol) {
            return true;
        }

        public IEnumerable<T> GetSymbols(int cls) {
            for (var i = 0; i < m_map.Length; i++)
                if (m_map[i] == cls)
                    yield return GetSymbolByIndex(i);
        }

        public abstract int GetSymbolIndex(T symbol);

        public abstract T GetSymbolByIndex(int index);

        public abstract IEnumerable<T> InputSymbols { get; }

        /// <summary>
        /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
        /// </summary>
        /// <returns>The translation map.</returns>
        public int[] GetTranslationMap() {
            return m_map;
        }
    }
}