annotate Implab/Parsing/IndexedAlphabetBase.cs @ 158:130781364799 v2

refactoring, code cleanup
author cin
date Thu, 18 Feb 2016 14:34:02 +0300
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
158
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
1 using Implab;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
2 using System;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
3 using System.Collections.Generic;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
4 using System.Diagnostics;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
5 using System.Linq;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
6 using System.Text;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
7 using System.Threading.Tasks;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
8
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
9 namespace Implab.Parsing {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
10 /// <summary>
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
11 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
12 /// </summary>
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
13 public abstract class IndexedAlphabetBase<T> : IAlphabet<T> {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
14 public const int UNCLASSIFIED = 0;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
15
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
16 int m_nextId = 1;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
17 readonly int[] m_map;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
18
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
19 public int Count {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
20 get { return m_nextId; }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
21 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
22
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
23 protected IndexedAlphabetBase(int mapSize) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
24 m_map = new int[mapSize];
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
25 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
26
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
27 protected IndexedAlphabetBase(int[] map) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
28 Debug.Assert(map != null);
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
29
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
30 m_map = map;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
31 m_nextId = map.Max() + 1;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
32 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
33
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
34 public int DefineSymbol(T symbol) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
35 var index = GetSymbolIndex(symbol);
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
36 if (m_map[index] == UNCLASSIFIED)
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
37 m_map[index] = m_nextId++;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
38 return m_map[index];
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
39 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
40
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
41 public int DefineClass(IEnumerable<T> symbols) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
42 Safe.ArgumentNotNull(symbols, "symbols");
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
43 symbols = symbols.Distinct();
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
44
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
45 foreach (var symbol in symbols) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
46 var index = GetSymbolIndex(symbol);
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
47 if (m_map[index] == UNCLASSIFIED)
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
48 m_map[GetSymbolIndex(symbol)] = m_nextId;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
49 else
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
50 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
51 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
52 return m_nextId++;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
53 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
54
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
55 public List<T>[] CreateReverseMap() {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
56 return
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
57 Enumerable.Range(UNCLASSIFIED, Count)
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
58 .Select(
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
59 i => InputSymbols
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
60 .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
61 .ToList()
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
62 )
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
63 .ToArray();
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
64 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
65
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
66 public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
67 Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
68 Safe.ArgumentNotNull(classes, "classes");
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
69 var reverseMap = CreateReverseMap();
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
70
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
71 int[] translationMap = new int[Count];
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
72
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
73 foreach (var scl in classes) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
74 // skip if the supper class contains the unclassified element
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
75 if (scl.Contains(UNCLASSIFIED))
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
76 continue;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
77 var range = new List<T>();
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
78 foreach (var cl in scl) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
79 if (cl < 0 || cl >= reverseMap.Length)
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
80 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
81 range.AddRange(reverseMap[cl]);
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
82 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
83 var newClass = newAlphabet.DefineClass(range);
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
84 foreach (var cl in scl)
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
85 translationMap[cl] = newClass;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
86 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
87
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
88 return translationMap;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
89 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
90
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
91 public virtual int Translate(T symbol) {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
92 return m_map[GetSymbolIndex(symbol)];
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
93 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
94
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
95 public abstract int GetSymbolIndex(T symbol);
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
96
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
97 public abstract IEnumerable<T> InputSymbols { get; }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
98
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
99 /// <summary>
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
100 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
101 /// </summary>
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
102 /// <returns>The translation map.</returns>
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
103 public int[] GetTranslationMap() {
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
104 return m_map;
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
105 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
106 }
130781364799 refactoring, code cleanup
cin
parents:
diff changeset
107 }