annotate Implab/Automaton/IndexedAlphabetBase.cs @ 170:181119ef3b39 ref20160224

DFA refactoring, rx based dfa.
author cin
date Fri, 04 Mar 2016 01:56:31 +0300
parents 96681e9d0cea
children 0f70905b4652
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
1 using Implab;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
2 using System;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
3 using System.Collections.Generic;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
4 using System.Diagnostics;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
5 using System.Linq;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
6
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
7 namespace Implab.Automaton {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
8 /// <summary>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
9 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
10 /// </summary>
167
cin
parents: 164
diff changeset
11 /// <remarks>
cin
parents: 164
diff changeset
12 /// Indexed alphabets are usefull in bulting efficient translations from source alphabet
cin
parents: 164
diff changeset
13 /// to the input alphabet of the automaton. It's assumed that the index to the symbol match
cin
parents: 164
diff changeset
14 /// is well known and documented.
cin
parents: 164
diff changeset
15 /// </remarks>
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
16 public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
17 int m_nextId = 1;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
18 readonly int[] m_map;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
19
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
20 public int Count {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
21 get { return m_nextId; }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
22 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
23
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
24 protected IndexedAlphabetBase(int mapSize) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
25 m_map = new int[mapSize];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
26 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
27
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
28 protected IndexedAlphabetBase(int[] map) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
29 Debug.Assert(map != null);
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
30
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
31 m_map = map;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
32 m_nextId = map.Max() + 1;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
33 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
34
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
35 public int DefineSymbol(T symbol) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
36 var index = GetSymbolIndex(symbol);
164
ec35731ae299 Almost complete DFA refactoring
cin
parents: 162
diff changeset
37 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
38 m_map[index] = m_nextId++;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
39 return m_map[index];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
40 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
41
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
42 public int DefineClass(IEnumerable<T> symbols) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
43 Safe.ArgumentNotNull(symbols, "symbols");
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
44 symbols = symbols.Distinct();
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
45
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
46 foreach (var symbol in symbols) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
47 var index = GetSymbolIndex(symbol);
164
ec35731ae299 Almost complete DFA refactoring
cin
parents: 162
diff changeset
48 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
49 m_map[GetSymbolIndex(symbol)] = m_nextId;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
50 else
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
51 throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
52 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
53 return m_nextId++;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
54 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
55
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
56 public List<T>[] CreateReverseMap() {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
57 return
164
ec35731ae299 Almost complete DFA refactoring
cin
parents: 162
diff changeset
58 Enumerable.Range(0, Count)
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
59 .Select(
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
60 i => InputSymbols
164
ec35731ae299 Almost complete DFA refactoring
cin
parents: 162
diff changeset
61 .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i)
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
62 .ToList()
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
63 )
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
64 .ToArray();
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
65 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
66
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
67 public int[] Reclassify(IAlphabetBuilder<T> newAlphabet, IEnumerable<IEnumerable<int>> classes) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
68 Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
69 Safe.ArgumentNotNull(classes, "classes");
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
70 var reverseMap = CreateReverseMap();
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
71
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
72 var translationMap = new int[Count];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
73
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
74 foreach (var scl in classes) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
75 // skip if the supper class contains the unclassified element
164
ec35731ae299 Almost complete DFA refactoring
cin
parents: 162
diff changeset
76 if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT))
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
77 continue;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
78 var range = new List<T>();
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
79 foreach (var cl in scl) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
80 if (cl < 0 || cl >= reverseMap.Length)
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
81 throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
82 range.AddRange(reverseMap[cl]);
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
83 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
84 var newClass = newAlphabet.DefineClass(range);
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
85 foreach (var cl in scl)
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
86 translationMap[cl] = newClass;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
87 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
88
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
89 return translationMap;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
90 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
91
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
92 public virtual int Translate(T symbol) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
93 return m_map[GetSymbolIndex(symbol)];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
94 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
95
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
96 public abstract int GetSymbolIndex(T symbol);
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
97
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
98 public abstract IEnumerable<T> InputSymbols { get; }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
99
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
100 /// <summary>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
101 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
102 /// </summary>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
103 /// <returns>The translation map.</returns>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
104 public int[] GetTranslationMap() {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
105 return m_map;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
106 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
107 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
108 }