comparison Implab/Formats/FastInpurScanner.cs @ 237:f2150c16b476 v2

missing files
author cin
date Wed, 22 Nov 2017 16:54:58 +0300
parents
children
comparison
equal deleted inserted replaced
236:302ca905c19e 237:f2150c16b476
1 using Implab.Automaton;
2 using System;
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Linq;
6 using System.Runtime.CompilerServices;
7 using System.Text;
8 using System.Threading.Tasks;
9
10 namespace Implab.Formats {
11
12 /// <summary>
13 /// Fast input scanner for max 255 states and first 255 input chacters.
14 /// </summary>
15 /// <typeparam name="TTag"></typeparam>
16 /// <remarks>
17 /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache.
18 ///
19 /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state.
20 ///
21 /// Each input symbol below 255 is treated as 255.
22 /// </remarks>
23 public class FastInputScanner<TTag> {
24 const int StateShift = 8;
25 const int StateMask = ~((1 << StateShift) - 1);
26 const int AlphabetSize = 1 << StateShift;
27 const int UnclassifiedInput = (1 << StateShift) - 1;
28 const byte UnreachableState = byte.MaxValue;
29
30 readonly TTag[] m_tags;
31 readonly bool[] m_final;
32
33 readonly byte m_initialState;
34 readonly byte[] m_dfa;
35
36 int m_position;
37 byte m_state;
38
39 protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
40 m_dfa = table;
41 m_final = final;
42 m_tags = tags;
43 m_initialState = initial;
44 }
45
46 public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
47 var states = dfaTable.GetLength(0);
48 Debug.Assert(states < byte.MaxValue);
49
50 m_dfa = new byte[states << StateShift];
51 m_initialState = (byte)initialState;
52
53 m_tags = tags;
54 m_final = finalStates;
55
56 // iterate over states
57 for(int si = 0; si < states; si++) {
58 // state offset for the new table
59 var offset = si << StateShift;
60
61 // iterate over alphabet
62 for(int a = 0; a < AlphabetSize; a++) {
63 var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
64 if (next == AutomatonConst.UnreachableState)
65 next = UnreachableState;
66
67 m_dfa[offset | a] = (byte)next;
68 }
69 }
70 }
71
72 public TTag Tag {
73 [MethodImpl(MethodImplOptions.AggressiveInlining)]
74 get {
75 return m_tags[m_state];
76 }
77 }
78
79 public int Position {
80 [MethodImpl(MethodImplOptions.AggressiveInlining)]
81 get {
82 return m_position;
83 }
84 }
85
86 public bool IsFinal {
87 [MethodImpl(MethodImplOptions.AggressiveInlining)]
88 get {
89 return m_final[m_state];
90 }
91 }
92
93 [MethodImpl(MethodImplOptions.AggressiveInlining)]
94 public void ResetState() {
95 m_state = m_initialState;
96 }
97
98 public FastInputScanner<TTag> Clone() {
99 var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
100 clone.m_state = m_state;
101 clone.m_position = m_position;
102 return clone;
103 }
104
105 public bool Scan(char[] data, int offset, int max) {
106 var next = m_state;
107
108 m_position = offset;
109 while (m_position < max) {
110 var ch = data[m_position];
111
112 next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
113
114 if (next == UnreachableState)
115 // scanner stops at the next position after the last recognized symbol
116 return false;
117
118 m_state = next;
119 m_position++;
120 }
121
122 return true;
123 }
124
125
126 }
127 }