diff Implab/Formats/FastInpurScanner.cs @ 237:f2150c16b476 v2

missing files
author cin
date Wed, 22 Nov 2017 16:54:58 +0300
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Formats/FastInpurScanner.cs	Wed Nov 22 16:54:58 2017 +0300
@@ -0,0 +1,127 @@
+using Implab.Automaton;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Formats {
+
+    /// <summary>
+    /// Fast input scanner for max 255 states and first 255 input chacters.
+    /// </summary>
+    /// <typeparam name="TTag"></typeparam>
+    /// <remarks>
+    /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache.
+    /// 
+    /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state.
+    /// 
+    /// Each input symbol below 255 is treated as 255.
+    /// </remarks>
+    public class FastInputScanner<TTag> {
+        const int StateShift = 8;
+        const int StateMask = ~((1 << StateShift) - 1);
+        const int AlphabetSize = 1 << StateShift;
+        const int UnclassifiedInput = (1 << StateShift) - 1;
+        const byte UnreachableState = byte.MaxValue;
+
+        readonly TTag[] m_tags;
+        readonly bool[] m_final;
+
+        readonly byte m_initialState;
+        readonly byte[] m_dfa;
+
+        int m_position;
+        byte m_state;
+
+        protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
+            m_dfa = table;
+            m_final = final;
+            m_tags = tags;
+            m_initialState = initial;
+        }
+
+        public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
+            var states = dfaTable.GetLength(0);
+            Debug.Assert(states < byte.MaxValue);
+
+            m_dfa = new byte[states << StateShift];
+            m_initialState = (byte)initialState;
+
+            m_tags = tags;
+            m_final = finalStates;
+
+            // iterate over states
+            for(int si = 0; si < states; si++) {
+                // state offset for the new table
+                var offset = si << StateShift;
+                
+                // iterate over alphabet
+                for(int a = 0; a < AlphabetSize; a++) {
+                    var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
+                    if (next == AutomatonConst.UnreachableState)
+                        next = UnreachableState;
+
+                    m_dfa[offset | a] = (byte)next;
+                }
+            }
+        }
+
+        public TTag Tag {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get {
+                return m_tags[m_state];
+            }
+        }
+
+        public int Position {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get {
+                return m_position;
+            }
+        }
+
+        public bool IsFinal {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get {
+                return m_final[m_state];
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public void ResetState() {
+            m_state = m_initialState;
+        }
+
+        public FastInputScanner<TTag> Clone() {
+            var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
+            clone.m_state = m_state;
+            clone.m_position = m_position;
+            return clone;
+        }
+
+        public bool Scan(char[] data, int offset, int max) {
+            var next = m_state;
+
+            m_position = offset;
+            while (m_position < max) {
+                var ch = data[m_position];
+
+                next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
+
+                if (next == UnreachableState)
+                    // scanner stops at the next position after the last recognized symbol
+                    return false;
+
+                m_state = next;
+                m_position++;
+            }
+            
+            return true;
+        }
+
+
+    }
+}