diff Implab/Automaton/IndexedAlphabetBase.cs @ 176:0c3c69fe225b ref20160224

rewritten the text scanner
author cin
date Tue, 22 Mar 2016 18:58:40 +0300
parents 92d5278d1b10
children c32688129f14
line wrap: on
line diff
--- a/Implab/Automaton/IndexedAlphabetBase.cs	Mon Mar 21 18:41:45 2016 +0300
+++ b/Implab/Automaton/IndexedAlphabetBase.cs	Tue Mar 22 18:58:40 2016 +0300
@@ -13,82 +13,38 @@
     /// to the input alphabet of the automaton. It's assumed that the index to the symbol match
     /// is well known and documented.
     /// </remarks>
-    public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
-        int m_nextId = 1;
-        readonly int[] m_map;
-
-        protected IndexedAlphabetBase(int mapSize) {
-            m_map = new int[mapSize];
-        }
-
-        protected IndexedAlphabetBase(int[] map) {
-            Debug.Assert(map != null && map.Length > 0);
-            Debug.Assert(map.All(x => x >= 0));
-
-            m_map = map;
-            m_nextId = map.Max() + 1;
-        }
-
-        public int DefineSymbol(T symbol) {
-            var index = GetSymbolIndex(symbol);
-            if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
-                m_map[index] = m_nextId++;
-            return m_map[index];
-        }
-
-        public int DefineSymbol(T symbol, int cls) {
-            var index = GetSymbolIndex(symbol);
-            m_map[index] = cls;
-            m_nextId = Math.Max(cls + 1, m_nextId);
-            return cls;
-        }
+    public abstract class IndexedAlphabetBase<T> : MapAlphabet<T> {
 
-        public int DefineClass(IEnumerable<T> symbols) {
-            return DefineClass(symbols, m_nextId);
-        }
-
-        public int DefineClass(IEnumerable<T> symbols, int cls) {
-            Safe.ArgumentNotNull(symbols, "symbols");
-            symbols = symbols.Distinct();
-
-            foreach (var symbol in symbols)
-                m_map[GetSymbolIndex(symbol)] = cls;
-            
-            m_nextId = Math.Max(cls + 1, m_nextId);
-
-            return cls;
-        }
-
-        public virtual int Translate(T symbol) {
-            return m_map[GetSymbolIndex(symbol)];
-        }
-
-        public int Count {
-            get { return m_nextId; }
-        }
-
-        public bool Contains(T symbol) {
-            return true;
-        }
-
-        public IEnumerable<T> GetSymbols(int cls) {
-            for (var i = 0; i < m_map.Length; i++)
-                if (m_map[i] == cls)
-                    yield return GetSymbolByIndex(i);
+        protected IndexedAlphabetBase() :base(true, null) {
         }
 
         public abstract int GetSymbolIndex(T symbol);
 
-        public abstract T GetSymbolByIndex(int index);
-
-        public abstract IEnumerable<T> InputSymbols { get; }
-
         /// <summary>
         /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
         /// </summary>
+        /// <remarks>
+        /// The map is continous and start from the symbol with zero code. The last symbol
+        /// in the map is the last classified symbol in the alphabet, i.e. the map can be
+        /// shorter then the whole alphabet.
+        /// </remarks>
         /// <returns>The translation map.</returns>
         public int[] GetTranslationMap() {
-            return m_map;
+            Dictionary<int,int> map = new Dictionary<int, int>();
+
+            int max;
+            foreach (var p in Mappings) {
+                var index = GetSymbolIndex(p.Key);
+                max = Math.Max(max, index);
+                map[index] = p.Value;
+            }
+
+            var result = new int[max + 1];
+
+            for (int i = 0; i < result.Length; i++)
+                map.TryGetValue(i, out result[i]);
+
+            return result;
         }
     }
 }