162
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Diagnostics;
|
|
5 using System.Linq;
|
|
6
|
|
7 namespace Implab.Automaton {
|
|
8 /// <summary>
|
|
9 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
|
|
10 /// </summary>
|
167
|
11 /// <remarks>
|
|
12 /// Indexed alphabets are usefull in bulting efficient translations from source alphabet
|
|
13 /// to the input alphabet of the automaton. It's assumed that the index to the symbol match
|
|
14 /// is well known and documented.
|
|
15 /// </remarks>
|
162
|
16 public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
|
|
17 int m_nextId = 1;
|
|
18 readonly int[] m_map;
|
|
19
|
|
20 public int Count {
|
|
21 get { return m_nextId; }
|
|
22 }
|
|
23
|
|
24 protected IndexedAlphabetBase(int mapSize) {
|
|
25 m_map = new int[mapSize];
|
|
26 }
|
|
27
|
|
28 protected IndexedAlphabetBase(int[] map) {
|
|
29 Debug.Assert(map != null);
|
|
30
|
|
31 m_map = map;
|
|
32 m_nextId = map.Max() + 1;
|
|
33 }
|
|
34
|
|
35 public int DefineSymbol(T symbol) {
|
|
36 var index = GetSymbolIndex(symbol);
|
164
|
37 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
|
162
|
38 m_map[index] = m_nextId++;
|
|
39 return m_map[index];
|
|
40 }
|
|
41
|
|
42 public int DefineClass(IEnumerable<T> symbols) {
|
|
43 Safe.ArgumentNotNull(symbols, "symbols");
|
|
44 symbols = symbols.Distinct();
|
|
45
|
|
46 foreach (var symbol in symbols) {
|
|
47 var index = GetSymbolIndex(symbol);
|
164
|
48 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
|
162
|
49 m_map[GetSymbolIndex(symbol)] = m_nextId;
|
|
50 else
|
|
51 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
|
|
52 }
|
|
53 return m_nextId++;
|
|
54 }
|
|
55
|
|
56 public List<T>[] CreateReverseMap() {
|
|
57 return
|
164
|
58 Enumerable.Range(0, Count)
|
162
|
59 .Select(
|
|
60 i => InputSymbols
|
164
|
61 .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i)
|
162
|
62 .ToList()
|
|
63 )
|
|
64 .ToArray();
|
|
65 }
|
|
66
|
|
67 public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) {
|
|
68 Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
|
|
69 Safe.ArgumentNotNull(classes, "classes");
|
|
70 var reverseMap = CreateReverseMap();
|
|
71
|
|
72 var translationMap = new int[Count];
|
|
73
|
|
74 foreach (var scl in classes) {
|
|
75 // skip if the supper class contains the unclassified element
|
164
|
76 if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT))
|
162
|
77 continue;
|
|
78 var range = new List<T>();
|
|
79 foreach (var cl in scl) {
|
|
80 if (cl < 0 || cl >= reverseMap.Length)
|
|
81 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
|
|
82 range.AddRange(reverseMap[cl]);
|
|
83 }
|
|
84 var newClass = newAlphabet.DefineClass(range);
|
|
85 foreach (var cl in scl)
|
|
86 translationMap[cl] = newClass;
|
|
87 }
|
|
88
|
|
89 return translationMap;
|
|
90 }
|
|
91
|
|
92 public virtual int Translate(T symbol) {
|
|
93 return m_map[GetSymbolIndex(symbol)];
|
|
94 }
|
|
95
|
|
96 public abstract int GetSymbolIndex(T symbol);
|
|
97
|
|
98 public abstract IEnumerable<T> InputSymbols { get; }
|
|
99
|
|
100 /// <summary>
|
|
101 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
|
|
102 /// </summary>
|
|
103 /// <returns>The translation map.</returns>
|
|
104 public int[] GetTranslationMap() {
|
|
105 return m_map;
|
|
106 }
|
|
107 }
|
|
108 }
|