237
+ − 1 using Implab.Automaton;
+ − 2 using System;
+ − 3 using System.Collections.Generic;
+ − 4 using System.Diagnostics;
+ − 5 using System.Linq;
+ − 6 using System.Runtime.CompilerServices;
+ − 7 using System.Text;
+ − 8 using System.Threading.Tasks;
+ − 9
+ − 10 namespace Implab.Formats {
+ − 11
+ − 12 /// <summary>
+ − 13 /// Fast input scanner for max 255 states and first 255 input chacters.
+ − 14 /// </summary>
+ − 15 /// <typeparam name="TTag"></typeparam>
+ − 16 /// <remarks>
+ − 17 /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache.
+ − 18 ///
+ − 19 /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state.
+ − 20 ///
+ − 21 /// Each input symbol below 255 is treated as 255.
+ − 22 /// </remarks>
+ − 23 public class FastInputScanner<TTag> {
+ − 24 const int StateShift = 8;
+ − 25 const int StateMask = ~((1 << StateShift) - 1);
+ − 26 const int AlphabetSize = 1 << StateShift;
+ − 27 const int UnclassifiedInput = (1 << StateShift) - 1;
+ − 28 const byte UnreachableState = byte.MaxValue;
+ − 29
+ − 30 readonly TTag[] m_tags;
+ − 31 readonly bool[] m_final;
+ − 32
+ − 33 readonly byte m_initialState;
+ − 34 readonly byte[] m_dfa;
+ − 35
+ − 36 int m_position;
+ − 37 byte m_state;
+ − 38
+ − 39 protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
+ − 40 m_dfa = table;
+ − 41 m_final = final;
+ − 42 m_tags = tags;
+ − 43 m_initialState = initial;
+ − 44 }
+ − 45
+ − 46 public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
+ − 47 var states = dfaTable.GetLength(0);
+ − 48 Debug.Assert(states < byte.MaxValue);
+ − 49
+ − 50 m_dfa = new byte[states << StateShift];
+ − 51 m_initialState = (byte)initialState;
+ − 52
+ − 53 m_tags = tags;
+ − 54 m_final = finalStates;
+ − 55
+ − 56 // iterate over states
+ − 57 for(int si = 0; si < states; si++) {
+ − 58 // state offset for the new table
+ − 59 var offset = si << StateShift;
+ − 60
+ − 61 // iterate over alphabet
+ − 62 for(int a = 0; a < AlphabetSize; a++) {
+ − 63 var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
+ − 64 if (next == AutomatonConst.UnreachableState)
+ − 65 next = UnreachableState;
+ − 66
+ − 67 m_dfa[offset | a] = (byte)next;
+ − 68 }
+ − 69 }
+ − 70 }
+ − 71
+ − 72 public TTag Tag {
+ − 73 [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ − 74 get {
+ − 75 return m_tags[m_state];
+ − 76 }
+ − 77 }
+ − 78
+ − 79 public int Position {
+ − 80 [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ − 81 get {
+ − 82 return m_position;
+ − 83 }
+ − 84 }
+ − 85
+ − 86 public bool IsFinal {
+ − 87 [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ − 88 get {
+ − 89 return m_final[m_state];
+ − 90 }
+ − 91 }
+ − 92
+ − 93 [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ − 94 public void ResetState() {
+ − 95 m_state = m_initialState;
+ − 96 }
+ − 97
+ − 98 public FastInputScanner<TTag> Clone() {
+ − 99 var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
+ − 100 clone.m_state = m_state;
+ − 101 clone.m_position = m_position;
+ − 102 return clone;
+ − 103 }
+ − 104
+ − 105 public bool Scan(char[] data, int offset, int max) {
+ − 106 var next = m_state;
+ − 107
+ − 108 m_position = offset;
+ − 109 while (m_position < max) {
+ − 110 var ch = data[m_position];
+ − 111
+ − 112 next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
+ − 113
+ − 114 if (next == UnreachableState)
+ − 115 // scanner stops at the next position after the last recognized symbol
+ − 116 return false;
+ − 117
+ − 118 m_state = next;
+ − 119 m_position++;
+ − 120 }
+ − 121
+ − 122 return true;
+ − 123 }
+ − 124
+ − 125
+ − 126 }
+ − 127 }