annotate Implab/Automaton/IndexedAlphabetBase.cs @ 172:92d5278d1b10 ref20160224

Working on text scanner
author cin
date Mon, 14 Mar 2016 01:19:38 +0300
parents 0f70905b4652
children 0c3c69fe225b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
1 using Implab;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
2 using System;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
3 using System.Collections.Generic;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
4 using System.Diagnostics;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
5 using System.Linq;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
6
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
7 namespace Implab.Automaton {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
8 /// <summary>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
9 /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index.
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
10 /// </summary>
167
cin
parents: 164
diff changeset
11 /// <remarks>
cin
parents: 164
diff changeset
12 /// Indexed alphabets are usefull in bulting efficient translations from source alphabet
cin
parents: 164
diff changeset
13 /// to the input alphabet of the automaton. It's assumed that the index to the symbol match
cin
parents: 164
diff changeset
14 /// is well known and documented.
cin
parents: 164
diff changeset
15 /// </remarks>
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
16 public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
17 int m_nextId = 1;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
18 readonly int[] m_map;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
19
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
20 protected IndexedAlphabetBase(int mapSize) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
21 m_map = new int[mapSize];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
22 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
23
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
24 protected IndexedAlphabetBase(int[] map) {
171
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
25 Debug.Assert(map != null && map.Length > 0);
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
26 Debug.Assert(map.All(x => x >= 0));
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
27
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
28 m_map = map;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
29 m_nextId = map.Max() + 1;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
30 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
31
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
32 public int DefineSymbol(T symbol) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
33 var index = GetSymbolIndex(symbol);
164
ec35731ae299 Almost complete DFA refactoring
cin
parents: 162
diff changeset
34 if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT)
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
35 m_map[index] = m_nextId++;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
36 return m_map[index];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
37 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
38
171
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
39 public int DefineSymbol(T symbol, int cls) {
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
40 var index = GetSymbolIndex(symbol);
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
41 m_map[index] = cls;
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
42 m_nextId = Math.Max(cls + 1, m_nextId);
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
43 return cls;
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
44 }
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
45
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
46 public int DefineClass(IEnumerable<T> symbols) {
171
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
47 return DefineClass(symbols, m_nextId);
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
48 }
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
49
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
50 public int DefineClass(IEnumerable<T> symbols, int cls) {
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
51 Safe.ArgumentNotNull(symbols, "symbols");
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
52 symbols = symbols.Distinct();
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
53
171
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
54 foreach (var symbol in symbols)
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
55 m_map[GetSymbolIndex(symbol)] = cls;
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
56
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
57 m_nextId = Math.Max(cls + 1, m_nextId);
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
58
171
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
59 return cls;
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
60 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
61
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
62 public virtual int Translate(T symbol) {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
63 return m_map[GetSymbolIndex(symbol)];
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
64 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
65
171
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
66 public int Count {
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
67 get { return m_nextId; }
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
68 }
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
69
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
70 public bool Contains(T symbol) {
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
71 return true;
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
72 }
0f70905b4652 Working on regular DFA
cin
parents: 167
diff changeset
73
172
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
74 public IEnumerable<T> GetSymbols(int cls) {
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
75 for (var i = 0; i < m_map.Length; i++)
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
76 if (m_map[i] == cls)
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
77 yield return GetSymbolByIndex(i);
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
78 }
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
79
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
80 public abstract int GetSymbolIndex(T symbol);
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
81
172
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
82 public abstract T GetSymbolByIndex(int index);
92d5278d1b10 Working on text scanner
cin
parents: 171
diff changeset
83
162
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
84 public abstract IEnumerable<T> InputSymbols { get; }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
85
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
86 /// <summary>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
87 /// Gets the translation map from the index of the symbol to it's class this is usefull for the optimized input symbols transtaion.
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
88 /// </summary>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
89 /// <returns>The translation map.</returns>
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
90 public int[] GetTranslationMap() {
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
91 return m_map;
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
92 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
93 }
0526412bbb26 DFA refactoring
cin
parents:
diff changeset
94 }