Mercurial > pub > ImplabNet
view Implab/Parsing/IndexedAlphabetBase.cs @ 159:5558e43c79bb v2
minor fix
author | cin |
---|---|
date | Thu, 18 Feb 2016 16:06:14 +0300 |
parents | 130781364799 |
children |
line wrap: on
line source
using Implab; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace Implab.Parsing { /// <summary> /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. /// </summary> public abstract class IndexedAlphabetBase<T> : IAlphabet<T> { public const int UNCLASSIFIED = 0; int m_nextId = 1; readonly int[] m_map; public int Count { get { return m_nextId; } } protected IndexedAlphabetBase(int mapSize) { m_map = new int[mapSize]; } protected IndexedAlphabetBase(int[] map) { Debug.Assert(map != null); m_map = map; m_nextId = map.Max() + 1; } public int DefineSymbol(T symbol) { var index = GetSymbolIndex(symbol); if (m_map[index] == UNCLASSIFIED) m_map[index] = m_nextId++; return m_map[index]; } public int DefineClass(IEnumerable<T> symbols) { Safe.ArgumentNotNull(symbols, "symbols"); symbols = symbols.Distinct(); foreach (var symbol in symbols) { var index = GetSymbolIndex(symbol); if (m_map[index] == UNCLASSIFIED) m_map[GetSymbolIndex(symbol)] = m_nextId; else throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); } return m_nextId++; } public List<T>[] CreateReverseMap() { return Enumerable.Range(UNCLASSIFIED, Count) .Select( i => InputSymbols .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) .ToList() ) .ToArray(); } public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) { Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); Safe.ArgumentNotNull(classes, "classes"); var reverseMap = CreateReverseMap(); int[] translationMap = new int[Count]; foreach (var scl in classes) { // skip if the supper class contains the unclassified element if (scl.Contains(UNCLASSIFIED)) continue; var range = new List<T>(); foreach (var cl in scl) { if (cl < 0 || cl >= reverseMap.Length) throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); range.AddRange(reverseMap[cl]); } var newClass = newAlphabet.DefineClass(range); foreach (var cl in scl) translationMap[cl] = newClass; } return translationMap; } public virtual int Translate(T symbol) { return m_map[GetSymbolIndex(symbol)]; } public abstract int GetSymbolIndex(T symbol); public abstract IEnumerable<T> InputSymbols { get; } /// <summary> /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. /// </summary> /// <returns>The translation map.</returns> public int[] GetTranslationMap() { return m_map; } } }