Mercurial > pub > ImplabNet
diff Implab/Automaton/IndexedAlphabetBase.cs @ 162:0526412bbb26 ref20160224
DFA refactoring
author | cin |
---|---|
date | Wed, 24 Feb 2016 08:39:53 +0300 |
parents | |
children | ec35731ae299 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/IndexedAlphabetBase.cs Wed Feb 24 08:39:53 2016 +0300 @@ -0,0 +1,105 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Automaton { + /// <summary> + /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. + /// </summary> + public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> { + public const int UNCLASSIFIED = 0; + + int m_nextId = 1; + readonly int[] m_map; + + public int Count { + get { return m_nextId; } + } + + protected IndexedAlphabetBase(int mapSize) { + m_map = new int[mapSize]; + } + + protected IndexedAlphabetBase(int[] map) { + Debug.Assert(map != null); + + m_map = map; + m_nextId = map.Max() + 1; + } + + public int DefineSymbol(T symbol) { + var index = GetSymbolIndex(symbol); + if (m_map[index] == UNCLASSIFIED) + m_map[index] = m_nextId++; + return m_map[index]; + } + + public int DefineClass(IEnumerable<T> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + symbols = symbols.Distinct(); + + foreach (var symbol in symbols) { + var index = GetSymbolIndex(symbol); + if (m_map[index] == UNCLASSIFIED) + m_map[GetSymbolIndex(symbol)] = m_nextId; + else + throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); + } + return m_nextId++; + } + + public List<T>[] CreateReverseMap() { + return + Enumerable.Range(UNCLASSIFIED, Count) + .Select( + i => InputSymbols + .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) + .ToList() + ) + .ToArray(); + } + + public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) { + Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); + Safe.ArgumentNotNull(classes, "classes"); + var reverseMap = CreateReverseMap(); + + var translationMap = new int[Count]; + + foreach (var scl in classes) { + // skip if the supper class contains the unclassified element + if (scl.Contains(UNCLASSIFIED)) + continue; + var range = new List<T>(); + foreach (var cl in scl) { + if (cl < 0 || cl >= reverseMap.Length) + throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); + range.AddRange(reverseMap[cl]); + } + var newClass = newAlphabet.DefineClass(range); + foreach (var cl in scl) + translationMap[cl] = newClass; + } + + return translationMap; + } + + public virtual int Translate(T symbol) { + return m_map[GetSymbolIndex(symbol)]; + } + + public abstract int GetSymbolIndex(T symbol); + + public abstract IEnumerable<T> InputSymbols { get; } + + /// <summary> + /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. + /// </summary> + /// <returns>The translation map.</returns> + public int[] GetTranslationMap() { + return m_map; + } + } +}