Mercurial > pub > ImplabNet
comparison Implab/Formats/FastInpurScanner.cs @ 237:f2150c16b476 v2
missing files
| author | cin |
|---|---|
| date | Wed, 22 Nov 2017 16:54:58 +0300 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 236:302ca905c19e | 237:f2150c16b476 |
|---|---|
| 1 using Implab.Automaton; | |
| 2 using System; | |
| 3 using System.Collections.Generic; | |
| 4 using System.Diagnostics; | |
| 5 using System.Linq; | |
| 6 using System.Runtime.CompilerServices; | |
| 7 using System.Text; | |
| 8 using System.Threading.Tasks; | |
| 9 | |
| 10 namespace Implab.Formats { | |
| 11 | |
| 12 /// <summary> | |
| 13 /// Fast input scanner for max 255 states and first 255 input chacters. | |
| 14 /// </summary> | |
| 15 /// <typeparam name="TTag"></typeparam> | |
| 16 /// <remarks> | |
| 17 /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache. | |
| 18 /// | |
| 19 /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. | |
| 20 /// | |
| 21 /// Each input symbol below 255 is treated as 255. | |
| 22 /// </remarks> | |
| 23 public class FastInputScanner<TTag> { | |
| 24 const int StateShift = 8; | |
| 25 const int StateMask = ~((1 << StateShift) - 1); | |
| 26 const int AlphabetSize = 1 << StateShift; | |
| 27 const int UnclassifiedInput = (1 << StateShift) - 1; | |
| 28 const byte UnreachableState = byte.MaxValue; | |
| 29 | |
| 30 readonly TTag[] m_tags; | |
| 31 readonly bool[] m_final; | |
| 32 | |
| 33 readonly byte m_initialState; | |
| 34 readonly byte[] m_dfa; | |
| 35 | |
| 36 int m_position; | |
| 37 byte m_state; | |
| 38 | |
| 39 protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) { | |
| 40 m_dfa = table; | |
| 41 m_final = final; | |
| 42 m_tags = tags; | |
| 43 m_initialState = initial; | |
| 44 } | |
| 45 | |
| 46 public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) { | |
| 47 var states = dfaTable.GetLength(0); | |
| 48 Debug.Assert(states < byte.MaxValue); | |
| 49 | |
| 50 m_dfa = new byte[states << StateShift]; | |
| 51 m_initialState = (byte)initialState; | |
| 52 | |
| 53 m_tags = tags; | |
| 54 m_final = finalStates; | |
| 55 | |
| 56 // iterate over states | |
| 57 for(int si = 0; si < states; si++) { | |
| 58 // state offset for the new table | |
| 59 var offset = si << StateShift; | |
| 60 | |
| 61 // iterate over alphabet | |
| 62 for(int a = 0; a < AlphabetSize; a++) { | |
| 63 var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput]; | |
| 64 if (next == AutomatonConst.UnreachableState) | |
| 65 next = UnreachableState; | |
| 66 | |
| 67 m_dfa[offset | a] = (byte)next; | |
| 68 } | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 public TTag Tag { | |
| 73 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| 74 get { | |
| 75 return m_tags[m_state]; | |
| 76 } | |
| 77 } | |
| 78 | |
| 79 public int Position { | |
| 80 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| 81 get { | |
| 82 return m_position; | |
| 83 } | |
| 84 } | |
| 85 | |
| 86 public bool IsFinal { | |
| 87 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| 88 get { | |
| 89 return m_final[m_state]; | |
| 90 } | |
| 91 } | |
| 92 | |
| 93 [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| 94 public void ResetState() { | |
| 95 m_state = m_initialState; | |
| 96 } | |
| 97 | |
| 98 public FastInputScanner<TTag> Clone() { | |
| 99 var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState); | |
| 100 clone.m_state = m_state; | |
| 101 clone.m_position = m_position; | |
| 102 return clone; | |
| 103 } | |
| 104 | |
| 105 public bool Scan(char[] data, int offset, int max) { | |
| 106 var next = m_state; | |
| 107 | |
| 108 m_position = offset; | |
| 109 while (m_position < max) { | |
| 110 var ch = data[m_position]; | |
| 111 | |
| 112 next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)]; | |
| 113 | |
| 114 if (next == UnreachableState) | |
| 115 // scanner stops at the next position after the last recognized symbol | |
| 116 return false; | |
| 117 | |
| 118 m_state = next; | |
| 119 m_position++; | |
| 120 } | |
| 121 | |
| 122 return true; | |
| 123 } | |
| 124 | |
| 125 | |
| 126 } | |
| 127 } |
