Mercurial > pub > ImplabNet
comparison Implab/Parsing/IndexedAlphabetBase.cs @ 158:130781364799 v2
refactoring, code cleanup
author | cin |
---|---|
date | Thu, 18 Feb 2016 14:34:02 +0300 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
157:948c015a9011 | 158:130781364799 |
---|---|
1 using Implab; | |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 using System.Text; | |
7 using System.Threading.Tasks; | |
8 | |
9 namespace Implab.Parsing { | |
10 /// <summary> | |
11 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. | |
12 /// </summary> | |
13 public abstract class IndexedAlphabetBase<T> : IAlphabet<T> { | |
14 public const int UNCLASSIFIED = 0; | |
15 | |
16 int m_nextId = 1; | |
17 readonly int[] m_map; | |
18 | |
19 public int Count { | |
20 get { return m_nextId; } | |
21 } | |
22 | |
23 protected IndexedAlphabetBase(int mapSize) { | |
24 m_map = new int[mapSize]; | |
25 } | |
26 | |
27 protected IndexedAlphabetBase(int[] map) { | |
28 Debug.Assert(map != null); | |
29 | |
30 m_map = map; | |
31 m_nextId = map.Max() + 1; | |
32 } | |
33 | |
34 public int DefineSymbol(T symbol) { | |
35 var index = GetSymbolIndex(symbol); | |
36 if (m_map[index] == UNCLASSIFIED) | |
37 m_map[index] = m_nextId++; | |
38 return m_map[index]; | |
39 } | |
40 | |
41 public int DefineClass(IEnumerable<T> symbols) { | |
42 Safe.ArgumentNotNull(symbols, "symbols"); | |
43 symbols = symbols.Distinct(); | |
44 | |
45 foreach (var symbol in symbols) { | |
46 var index = GetSymbolIndex(symbol); | |
47 if (m_map[index] == UNCLASSIFIED) | |
48 m_map[GetSymbolIndex(symbol)] = m_nextId; | |
49 else | |
50 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); | |
51 } | |
52 return m_nextId++; | |
53 } | |
54 | |
55 public List<T>[] CreateReverseMap() { | |
56 return | |
57 Enumerable.Range(UNCLASSIFIED, Count) | |
58 .Select( | |
59 i => InputSymbols | |
60 .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) | |
61 .ToList() | |
62 ) | |
63 .ToArray(); | |
64 } | |
65 | |
66 public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) { | |
67 Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); | |
68 Safe.ArgumentNotNull(classes, "classes"); | |
69 var reverseMap = CreateReverseMap(); | |
70 | |
71 int[] translationMap = new int[Count]; | |
72 | |
73 foreach (var scl in classes) { | |
74 // skip if the supper class contains the unclassified element | |
75 if (scl.Contains(UNCLASSIFIED)) | |
76 continue; | |
77 var range = new List<T>(); | |
78 foreach (var cl in scl) { | |
79 if (cl < 0 || cl >= reverseMap.Length) | |
80 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); | |
81 range.AddRange(reverseMap[cl]); | |
82 } | |
83 var newClass = newAlphabet.DefineClass(range); | |
84 foreach (var cl in scl) | |
85 translationMap[cl] = newClass; | |
86 } | |
87 | |
88 return translationMap; | |
89 } | |
90 | |
91 public virtual int Translate(T symbol) { | |
92 return m_map[GetSymbolIndex(symbol)]; | |
93 } | |
94 | |
95 public abstract int GetSymbolIndex(T symbol); | |
96 | |
97 public abstract IEnumerable<T> InputSymbols { get; } | |
98 | |
99 /// <summary> | |
100 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion. | |
101 /// </summary> | |
102 /// <returns>The translation map.</returns> | |
103 public int[] GetTranslationMap() { | |
104 return m_map; | |
105 } | |
106 } | |
107 } |