237
|
1 using Implab.Automaton;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Diagnostics;
|
|
5 using System.Linq;
|
|
6 using System.Runtime.CompilerServices;
|
|
7 using System.Text;
|
|
8 using System.Threading.Tasks;
|
|
9
|
|
10 namespace Implab.Formats {
|
|
11
|
|
12 /// <summary>
|
|
13 /// Fast input scanner for max 255 states and first 255 input chacters.
|
|
14 /// </summary>
|
|
15 /// <typeparam name="TTag"></typeparam>
|
|
16 /// <remarks>
|
|
17 /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache.
|
|
18 ///
|
|
19 /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state.
|
|
20 ///
|
|
21 /// Each input symbol below 255 is treated as 255.
|
|
22 /// </remarks>
|
|
23 public class FastInputScanner<TTag> {
|
|
24 const int StateShift = 8;
|
|
25 const int StateMask = ~((1 << StateShift) - 1);
|
|
26 const int AlphabetSize = 1 << StateShift;
|
|
27 const int UnclassifiedInput = (1 << StateShift) - 1;
|
|
28 const byte UnreachableState = byte.MaxValue;
|
|
29
|
|
30 readonly TTag[] m_tags;
|
|
31 readonly bool[] m_final;
|
|
32
|
|
33 readonly byte m_initialState;
|
|
34 readonly byte[] m_dfa;
|
|
35
|
|
36 int m_position;
|
|
37 byte m_state;
|
|
38
|
|
39 protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
|
|
40 m_dfa = table;
|
|
41 m_final = final;
|
|
42 m_tags = tags;
|
|
43 m_initialState = initial;
|
|
44 }
|
|
45
|
|
46 public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
|
|
47 var states = dfaTable.GetLength(0);
|
|
48 Debug.Assert(states < byte.MaxValue);
|
|
49
|
|
50 m_dfa = new byte[states << StateShift];
|
|
51 m_initialState = (byte)initialState;
|
|
52
|
|
53 m_tags = tags;
|
|
54 m_final = finalStates;
|
|
55
|
|
56 // iterate over states
|
|
57 for(int si = 0; si < states; si++) {
|
|
58 // state offset for the new table
|
|
59 var offset = si << StateShift;
|
|
60
|
|
61 // iterate over alphabet
|
|
62 for(int a = 0; a < AlphabetSize; a++) {
|
|
63 var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
|
|
64 if (next == AutomatonConst.UnreachableState)
|
|
65 next = UnreachableState;
|
|
66
|
|
67 m_dfa[offset | a] = (byte)next;
|
|
68 }
|
|
69 }
|
|
70 }
|
|
71
|
|
72 public TTag Tag {
|
|
73 [MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
74 get {
|
|
75 return m_tags[m_state];
|
|
76 }
|
|
77 }
|
|
78
|
|
79 public int Position {
|
|
80 [MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
81 get {
|
|
82 return m_position;
|
|
83 }
|
|
84 }
|
|
85
|
|
86 public bool IsFinal {
|
|
87 [MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
88 get {
|
|
89 return m_final[m_state];
|
|
90 }
|
|
91 }
|
|
92
|
|
93 [MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
94 public void ResetState() {
|
|
95 m_state = m_initialState;
|
|
96 }
|
|
97
|
|
98 public FastInputScanner<TTag> Clone() {
|
|
99 var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
|
|
100 clone.m_state = m_state;
|
|
101 clone.m_position = m_position;
|
|
102 return clone;
|
|
103 }
|
|
104
|
|
105 public bool Scan(char[] data, int offset, int max) {
|
|
106 var next = m_state;
|
|
107
|
|
108 m_position = offset;
|
|
109 while (m_position < max) {
|
|
110 var ch = data[m_position];
|
|
111
|
|
112 next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
|
|
113
|
|
114 if (next == UnreachableState)
|
|
115 // scanner stops at the next position after the last recognized symbol
|
|
116 return false;
|
|
117
|
|
118 m_state = next;
|
|
119 m_position++;
|
|
120 }
|
|
121
|
|
122 return true;
|
|
123 }
|
|
124
|
|
125
|
|
126 }
|
|
127 }
|