annotate Implab/Parsing/AlphabetBase.cs @ 89:ce0171cacec4 v2

improved performance of a chained map operation
author cin
date Wed, 08 Oct 2014 02:19:45 +0400
parents 21611344d366
children 97fbbf816844
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
55
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
1 using Implab;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
2 using System;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
3 using System.Collections.Generic;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
4 using System.Diagnostics;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
5 using System.Linq;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
6 using System.Text;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
7 using System.Threading.Tasks;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
8
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
9 namespace Implab.Parsing {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
10 public abstract class AlphabetBase<T> : IAlphabet<T> {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
11 public const int UNCLASSIFIED = 0;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
12
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
13 int m_nextId = 1;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
14 int[] m_map;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
15
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
16 public int Count {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
17 get { return m_nextId; }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
18 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
19
59
21611344d366 code cleanup
cin
parents: 55
diff changeset
20 protected AlphabetBase(int mapSize) {
21611344d366 code cleanup
cin
parents: 55
diff changeset
21 m_map = new int[mapSize];
55
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
22 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
23
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
24 protected AlphabetBase(int[] map) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
25 Debug.Assert(map != null);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
26
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
27 m_map = map;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
28 m_nextId = map.Max() + 1;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
29 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
30
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
31 public int DefineSymbol(T symbol) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
32 var index = GetSymbolIndex(symbol);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
33 if (m_map[index] == UNCLASSIFIED)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
34 m_map[index] = m_nextId++;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
35 return m_map[index];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
36 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
37
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
38 public int DefineClass(IEnumerable<T> symbols) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
39 Safe.ArgumentNotNull(symbols, "symbols");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
40 symbols = symbols.Distinct();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
41
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
42 foreach (var symbol in symbols) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
43 var index = GetSymbolIndex(symbol);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
44 if (m_map[index] == UNCLASSIFIED)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
45 m_map[GetSymbolIndex(symbol)] = m_nextId;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
46 else
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
47 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
48 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
49 return m_nextId++;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
50 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
51
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
52 public List<T>[] CreateReverseMap() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
53 return
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
54 Enumerable.Range(UNCLASSIFIED, Count)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
55 .Select(
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
56 i => InputSymbols
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
57 .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
58 .ToList()
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
59 )
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
60 .ToArray();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
61 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
62
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
63 public int[] Reclassify(IAlphabet<T> newAlphabet, IEnumerable<ICollection<int>> classes) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
64 Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
65 Safe.ArgumentNotNull(classes, "classes");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
66 var reverseMap = CreateReverseMap();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
67
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
68 int[] translationMap = new int[Count];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
69
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
70 foreach (var scl in classes) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
71 // skip if the supper class contains the unclassified element
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
72 if (scl.Contains(UNCLASSIFIED))
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
73 continue;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
74 var range = new List<T>();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
75 foreach (var cl in scl) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
76 if (cl < 0 || cl >= reverseMap.Length)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
77 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
78 range.AddRange(reverseMap[cl]);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
79 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
80 var newClass = newAlphabet.DefineClass(range);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
81 foreach (var cl in scl)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
82 translationMap[cl] = newClass;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
83 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
84
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
85 return translationMap;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
86 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
87
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
88 public int Translate(T symbol) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
89 return m_map[GetSymbolIndex(symbol)];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
90 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
91
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
92 public abstract int GetSymbolIndex(T symbol);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
93
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
94 public abstract IEnumerable<T> InputSymbols { get; }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
95
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
96 public int[] GetTranslationMap() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
97 return m_map;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
98 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
99 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
100 }