Mercurial > pub > ImplabNet
diff Implab/Formats/FastInpurScanner.cs @ 237:f2150c16b476 v2
missing files
author | cin |
---|---|
date | Wed, 22 Nov 2017 16:54:58 +0300 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/FastInpurScanner.cs Wed Nov 22 16:54:58 2017 +0300 @@ -0,0 +1,127 @@ +using Implab.Automaton; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats { + + /// <summary> + /// Fast input scanner for max 255 states and first 255 input chacters. + /// </summary> + /// <typeparam name="TTag"></typeparam> + /// <remarks> + /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache. + /// + /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state. + /// + /// Each input symbol below 255 is treated as 255. + /// </remarks> + public class FastInputScanner<TTag> { + const int StateShift = 8; + const int StateMask = ~((1 << StateShift) - 1); + const int AlphabetSize = 1 << StateShift; + const int UnclassifiedInput = (1 << StateShift) - 1; + const byte UnreachableState = byte.MaxValue; + + readonly TTag[] m_tags; + readonly bool[] m_final; + + readonly byte m_initialState; + readonly byte[] m_dfa; + + int m_position; + byte m_state; + + protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) { + m_dfa = table; + m_final = final; + m_tags = tags; + m_initialState = initial; + } + + public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) { + var states = dfaTable.GetLength(0); + Debug.Assert(states < byte.MaxValue); + + m_dfa = new byte[states << StateShift]; + m_initialState = (byte)initialState; + + m_tags = tags; + m_final = finalStates; + + // iterate over states + for(int si = 0; si < states; si++) { + // state offset for the new table + var offset = si << StateShift; + + // iterate over alphabet + for(int a = 0; a < AlphabetSize; a++) { + var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput]; + if (next == AutomatonConst.UnreachableState) + next = UnreachableState; + + m_dfa[offset | a] = (byte)next; + } + } + } + + public TTag Tag { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get { + return m_tags[m_state]; + } + } + + public int Position { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get { + return m_position; + } + } + + public bool IsFinal { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get { + return m_final[m_state]; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void ResetState() { + m_state = m_initialState; + } + + public FastInputScanner<TTag> Clone() { + var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState); + clone.m_state = m_state; + clone.m_position = m_position; + return clone; + } + + public bool Scan(char[] data, int offset, int max) { + var next = m_state; + + m_position = offset; + while (m_position < max) { + var ch = data[m_position]; + + next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)]; + + if (next == UnreachableState) + // scanner stops at the next position after the last recognized symbol + return false; + + m_state = next; + m_position++; + } + + return true; + } + + + } +}