view Implab/Formats/FastInpurScanner.cs @ 281:e0916ddc9950 v3 tip

code cleanup and refactoring
author cin
date Fri, 01 Jun 2018 21:35:24 +0300 (2018-06-01)
parents f2150c16b476
children
line wrap: on
line source
using Implab.Automaton;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;

namespace Implab.Formats {

    /// <summary>
    /// Fast input scanner for max 255 states and first 255 input chacters.
    /// </summary>
    /// <typeparam name="TTag"></typeparam>
    /// <remarks>
    /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache.
    /// 
    /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state.
    /// 
    /// Each input symbol below 255 is treated as 255.
    /// </remarks>
    public class FastInputScanner<TTag> {
        const int StateShift = 8;
        const int StateMask = ~((1 << StateShift) - 1);
        const int AlphabetSize = 1 << StateShift;
        const int UnclassifiedInput = (1 << StateShift) - 1;
        const byte UnreachableState = byte.MaxValue;

        readonly TTag[] m_tags;
        readonly bool[] m_final;

        readonly byte m_initialState;
        readonly byte[] m_dfa;

        int m_position;
        byte m_state;

        protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
            m_dfa = table;
            m_final = final;
            m_tags = tags;
            m_initialState = initial;
        }

        public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
            var states = dfaTable.GetLength(0);
            Debug.Assert(states < byte.MaxValue);

            m_dfa = new byte[states << StateShift];
            m_initialState = (byte)initialState;

            m_tags = tags;
            m_final = finalStates;

            // iterate over states
            for(int si = 0; si < states; si++) {
                // state offset for the new table
                var offset = si << StateShift;
                
                // iterate over alphabet
                for(int a = 0; a < AlphabetSize; a++) {
                    var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
                    if (next == AutomatonConst.UnreachableState)
                        next = UnreachableState;

                    m_dfa[offset | a] = (byte)next;
                }
            }
        }

        public TTag Tag {
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            get {
                return m_tags[m_state];
            }
        }

        public int Position {
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            get {
                return m_position;
            }
        }

        public bool IsFinal {
            [MethodImpl(MethodImplOptions.AggressiveInlining)]
            get {
                return m_final[m_state];
            }
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public void ResetState() {
            m_state = m_initialState;
        }

        public FastInputScanner<TTag> Clone() {
            var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
            clone.m_state = m_state;
            clone.m_position = m_position;
            return clone;
        }

        public bool Scan(char[] data, int offset, int max) {
            var next = m_state;

            m_position = offset;
            while (m_position < max) {
                var ch = data[m_position];

                next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];

                if (next == UnreachableState)
                    // scanner stops at the next position after the last recognized symbol
                    return false;

                m_state = next;
                m_position++;
            }
            
            return true;
        }


    }
}