Mercurial > pub > ImplabNet
comparison Implab/Formats/FastInpurScanner.cs @ 237:f2150c16b476 v2
missing files
author | cin |
---|---|
date | Wed, 22 Nov 2017 16:54:58 +0300 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
236:302ca905c19e | 237:f2150c16b476 |
---|---|
1 using Implab.Automaton; | |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Diagnostics; | |
5 using System.Linq; | |
6 using System.Runtime.CompilerServices; | |
7 using System.Text; | |
8 using System.Threading.Tasks; | |
9 | |
10 namespace Implab.Formats { | |
11 | |
12 /// <summary> | |
13 /// Fast input scanner for max 255 states and first 255 input chacters. | |
14 /// </summary> | |
15 /// <typeparam name="TTag"></typeparam> | |
16 /// <remarks> | |
17 /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache. | |
18 /// | |
19 /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. | |
20 /// | |
21 /// Each input symbol below 255 is treated as 255. | |
22 /// </remarks> | |
23 public class FastInputScanner<TTag> { | |
24 const int StateShift = 8; | |
25 const int StateMask = ~((1 << StateShift) - 1); | |
26 const int AlphabetSize = 1 << StateShift; | |
27 const int UnclassifiedInput = (1 << StateShift) - 1; | |
28 const byte UnreachableState = byte.MaxValue; | |
29 | |
30 readonly TTag[] m_tags; | |
31 readonly bool[] m_final; | |
32 | |
33 readonly byte m_initialState; | |
34 readonly byte[] m_dfa; | |
35 | |
36 int m_position; | |
37 byte m_state; | |
38 | |
39 protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) { | |
40 m_dfa = table; | |
41 m_final = final; | |
42 m_tags = tags; | |
43 m_initialState = initial; | |
44 } | |
45 | |
46 public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) { | |
47 var states = dfaTable.GetLength(0); | |
48 Debug.Assert(states < byte.MaxValue); | |
49 | |
50 m_dfa = new byte[states << StateShift]; | |
51 m_initialState = (byte)initialState; | |
52 | |
53 m_tags = tags; | |
54 m_final = finalStates; | |
55 | |
56 // iterate over states | |
57 for(int si = 0; si < states; si++) { | |
58 // state offset for the new table | |
59 var offset = si << StateShift; | |
60 | |
61 // iterate over alphabet | |
62 for(int a = 0; a < AlphabetSize; a++) { | |
63 var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput]; | |
64 if (next == AutomatonConst.UnreachableState) | |
65 next = UnreachableState; | |
66 | |
67 m_dfa[offset | a] = (byte)next; | |
68 } | |
69 } | |
70 } | |
71 | |
72 public TTag Tag { | |
73 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
74 get { | |
75 return m_tags[m_state]; | |
76 } | |
77 } | |
78 | |
79 public int Position { | |
80 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
81 get { | |
82 return m_position; | |
83 } | |
84 } | |
85 | |
86 public bool IsFinal { | |
87 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
88 get { | |
89 return m_final[m_state]; | |
90 } | |
91 } | |
92 | |
93 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
94 public void ResetState() { | |
95 m_state = m_initialState; | |
96 } | |
97 | |
98 public FastInputScanner<TTag> Clone() { | |
99 var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState); | |
100 clone.m_state = m_state; | |
101 clone.m_position = m_position; | |
102 return clone; | |
103 } | |
104 | |
105 public bool Scan(char[] data, int offset, int max) { | |
106 var next = m_state; | |
107 | |
108 m_position = offset; | |
109 while (m_position < max) { | |
110 var ch = data[m_position]; | |
111 | |
112 next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)]; | |
113 | |
114 if (next == UnreachableState) | |
115 // scanner stops at the next position after the last recognized symbol | |
116 return false; | |
117 | |
118 m_state = next; | |
119 m_position++; | |
120 } | |
121 | |
122 return true; | |
123 } | |
124 | |
125 | |
126 } | |
127 } |