comparison Implab/Parsing/IndexedAlphabetBase.cs @ 158:130781364799 v2

refactoring, code cleanup
author cin
date Thu, 18 Feb 2016 14:34:02 +0300
parents
children
comparison
equal deleted inserted replaced
157:948c015a9011 158:130781364799
1 using Implab;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6 using System.Text;
7 using System.Threading.Tasks;
8
9 namespace Implab.Parsing {
10 /// <summary>
11 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
12 /// </summary>
13 public abstract class IndexedAlphabetBase<T> : IAlphabet<T> {
14 public const int UNCLASSIFIED = 0;
15
16 int m_nextId = 1;
17 readonly int[] m_map;
18
19 public int Count {
20 get { return m_nextId; }
21 }
22
23 protected IndexedAlphabetBase(int mapSize) {
24 m_map = new int[mapSize];
25 }
26
27 protected IndexedAlphabetBase(int[] map) {
28 Debug.Assert(map != null);
29
30 m_map = map;
31 m_nextId = map.Max() + 1;
32 }
33
34 public int DefineSymbol(T symbol) {
35 var index = GetSymbolIndex(symbol);
36 if (m_map[index] == UNCLASSIFIED)
37 m_map[index] = m_nextId++;
38 return m_map[index];
39 }
40
41 public int DefineClass(IEnumerable<T> symbols) {
42 Safe.ArgumentNotNull(symbols, "symbols");
43 symbols = symbols.Distinct();
44
45 foreach (var symbol in symbols) {
46 var index = GetSymbolIndex(symbol);
47 if (m_map[index] == UNCLASSIFIED)
48 m_map[GetSymbolIndex(symbol)] = m_nextId;
49 else
50 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
51 }
52 return m_nextId++;
53 }
54
55 public List<T>[] CreateReverseMap() {
56 return
57 Enumerable.Range(UNCLASSIFIED, Count)
58 .Select(
59 i => InputSymbols
60 .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
61 .ToList()
62 )
63 .ToArray();
64 }
65
66 public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) {
67 Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
68 Safe.ArgumentNotNull(classes, "classes");
69 var reverseMap = CreateReverseMap();
70
71 int[] translationMap = new int[Count];
72
73 foreach (var scl in classes) {
74 // skip if the supper class contains the unclassified element
75 if (scl.Contains(UNCLASSIFIED))
76 continue;
77 var range = new List<T>();
78 foreach (var cl in scl) {
79 if (cl < 0 || cl >= reverseMap.Length)
80 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
81 range.AddRange(reverseMap[cl]);
82 }
83 var newClass = newAlphabet.DefineClass(range);
84 foreach (var cl in scl)
85 translationMap[cl] = newClass;
86 }
87
88 return translationMap;
89 }
90
91 public virtual int Translate(T symbol) {
92 return m_map[GetSymbolIndex(symbol)];
93 }
94
95 public abstract int GetSymbolIndex(T symbol);
96
97 public abstract IEnumerable<T> InputSymbols { get; }
98
99 /// <summary>
100 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
101 /// </summary>
102 /// <returns>The translation map.</returns>
103 public int[] GetTranslationMap() {
104 return m_map;
105 }
106 }
107 }