Mercurial > pub > ImplabNet
view Implab/Formats/FastInpurScanner.cs @ 265:74e048cbaac8 v3 v3.0.10
Restored lost JsonXmlCaseTransform from version 2.1
author | cin |
---|---|
date | Mon, 16 Apr 2018 19:43:18 +0300 |
parents | f2150c16b476 |
children |
line wrap: on
line source
using Implab.Automaton; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Runtime.CompilerServices; using System.Text; using System.Threading.Tasks; namespace Implab.Formats { /// <summary> /// Fast input scanner for max 255 states and first 255 input chacters. /// </summary> /// <typeparam name="TTag"></typeparam> /// <remarks> /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache. /// /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. /// /// Each input symbol below 255 is treated as 255. /// </remarks> public class FastInputScanner<TTag> { const int StateShift = 8; const int StateMask = ~((1 << StateShift) - 1); const int AlphabetSize = 1 << StateShift; const int UnclassifiedInput = (1 << StateShift) - 1; const byte UnreachableState = byte.MaxValue; readonly TTag[] m_tags; readonly bool[] m_final; readonly byte m_initialState; readonly byte[] m_dfa; int m_position; byte m_state; protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) { m_dfa = table; m_final = final; m_tags = tags; m_initialState = initial; } public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) { var states = dfaTable.GetLength(0); Debug.Assert(states < byte.MaxValue); m_dfa = new byte[states << StateShift]; m_initialState = (byte)initialState; m_tags = tags; m_final = finalStates; // iterate over states for(int si = 0; si < states; si++) { // state offset for the new table var offset = si << StateShift; // iterate over alphabet for(int a = 0; a < AlphabetSize; a++) { var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput]; if (next == AutomatonConst.UnreachableState) next = UnreachableState; m_dfa[offset | a] = (byte)next; } } } public TTag Tag { [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return m_tags[m_state]; } } public int Position { [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return m_position; } } public bool IsFinal { [MethodImpl(MethodImplOptions.AggressiveInlining)] get { return m_final[m_state]; } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void ResetState() { m_state = m_initialState; } public FastInputScanner<TTag> Clone() { var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState); clone.m_state = m_state; clone.m_position = m_position; return clone; } public bool Scan(char[] data, int offset, int max) { var next = m_state; m_position = offset; while (m_position < max) { var ch = data[m_position]; next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)]; if (next == UnreachableState) // scanner stops at the next position after the last recognized symbol return false; m_state = next; m_position++; } return true; } } }