diff Implab/Parsing/IndexedAlphabetBase.cs @ 158:130781364799 v2

refactoring, code cleanup
author cin
date Thu, 18 Feb 2016 14:34:02 +0300
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/IndexedAlphabetBase.cs	Thu Feb 18 14:34:02 2016 +0300
@@ -0,0 +1,107 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+    /// <summary>
+    /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
+    /// </summary>
+    public abstract class IndexedAlphabetBase<T> : IAlphabet<T> {
+        public const int UNCLASSIFIED = 0;
+
+        int m_nextId = 1;
+        readonly int[] m_map;
+
+        public int Count {
+            get { return m_nextId; }
+        }
+
+        protected IndexedAlphabetBase(int mapSize) {
+            m_map = new int[mapSize];
+        }
+
+        protected IndexedAlphabetBase(int[] map) {
+            Debug.Assert(map != null);
+
+            m_map = map;
+            m_nextId = map.Max() + 1;
+        }
+
+        public int DefineSymbol(T symbol) {
+            var index = GetSymbolIndex(symbol);
+            if (m_map[index] == UNCLASSIFIED)
+                m_map[index] = m_nextId++;
+            return m_map[index];
+        }
+
+        public int DefineClass(IEnumerable<T> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+            symbols = symbols.Distinct();
+
+            foreach (var symbol in symbols) {
+                var index = GetSymbolIndex(symbol);
+                if (m_map[index] == UNCLASSIFIED)
+                    m_map[GetSymbolIndex(symbol)] = m_nextId;
+                else
+                    throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
+            }
+            return m_nextId++;
+        }
+
+        public List<T>[] CreateReverseMap() {
+            return
+                Enumerable.Range(UNCLASSIFIED, Count)
+                    .Select(
+                        i => InputSymbols
+                            .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
+                            .ToList()
+                    )
+                    .ToArray();
+        }
+
+        public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) {
+            Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
+            Safe.ArgumentNotNull(classes, "classes");
+            var reverseMap = CreateReverseMap();
+
+            int[] translationMap = new int[Count];
+
+            foreach (var scl in classes) {
+                // skip if the supper class contains the unclassified element
+                if (scl.Contains(UNCLASSIFIED))
+                    continue;
+                var range = new List<T>();
+                foreach (var cl in scl) {
+                    if (cl < 0 || cl >= reverseMap.Length)
+                        throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
+                    range.AddRange(reverseMap[cl]);
+                }
+                var newClass = newAlphabet.DefineClass(range);
+                foreach (var cl in scl)
+                    translationMap[cl] = newClass;
+            }
+
+            return translationMap;
+        }
+
+        public virtual int Translate(T symbol) {
+            return m_map[GetSymbolIndex(symbol)];
+        }
+
+        public abstract int GetSymbolIndex(T symbol);
+
+        public abstract IEnumerable<T> InputSymbols { get; }
+
+        /// <summary>
+        /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
+        /// </summary>
+        /// <returns>The translation map.</returns>
+        public int[] GetTranslationMap() {
+            return m_map;
+        }
+    }
+}