annotate Implab/Formats/FastInpurScanner.cs @ 265:74e048cbaac8 v3 v3.0.10

Restored lost JsonXmlCaseTransform from version 2.1
author cin
date Mon, 16 Apr 2018 19:43:18 +0300
parents f2150c16b476
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
237
f2150c16b476 missing files
cin
parents:
diff changeset
1 using Implab.Automaton;
f2150c16b476 missing files
cin
parents:
diff changeset
2 using System;
f2150c16b476 missing files
cin
parents:
diff changeset
3 using System.Collections.Generic;
f2150c16b476 missing files
cin
parents:
diff changeset
4 using System.Diagnostics;
f2150c16b476 missing files
cin
parents:
diff changeset
5 using System.Linq;
f2150c16b476 missing files
cin
parents:
diff changeset
6 using System.Runtime.CompilerServices;
f2150c16b476 missing files
cin
parents:
diff changeset
7 using System.Text;
f2150c16b476 missing files
cin
parents:
diff changeset
8 using System.Threading.Tasks;
f2150c16b476 missing files
cin
parents:
diff changeset
9
f2150c16b476 missing files
cin
parents:
diff changeset
10 namespace Implab.Formats {
f2150c16b476 missing files
cin
parents:
diff changeset
11
f2150c16b476 missing files
cin
parents:
diff changeset
12 /// <summary>
f2150c16b476 missing files
cin
parents:
diff changeset
13 /// Fast input scanner for max 255 states and first 255 input chacters.
f2150c16b476 missing files
cin
parents:
diff changeset
14 /// </summary>
f2150c16b476 missing files
cin
parents:
diff changeset
15 /// <typeparam name="TTag"></typeparam>
f2150c16b476 missing files
cin
parents:
diff changeset
16 /// <remarks>
f2150c16b476 missing files
cin
parents:
diff changeset
17 /// Creates a one rank array to store automa transition table, each entry in this table is byte, to make this table fit into L1 cache.
f2150c16b476 missing files
cin
parents:
diff changeset
18 ///
f2150c16b476 missing files
cin
parents:
diff changeset
19 /// Each entry is addressed as <c>(state << 8) | input</c> which make this quite fast to get the next state.
f2150c16b476 missing files
cin
parents:
diff changeset
20 ///
f2150c16b476 missing files
cin
parents:
diff changeset
21 /// Each input symbol below 255 is treated as 255.
f2150c16b476 missing files
cin
parents:
diff changeset
22 /// </remarks>
f2150c16b476 missing files
cin
parents:
diff changeset
23 public class FastInputScanner<TTag> {
f2150c16b476 missing files
cin
parents:
diff changeset
24 const int StateShift = 8;
f2150c16b476 missing files
cin
parents:
diff changeset
25 const int StateMask = ~((1 << StateShift) - 1);
f2150c16b476 missing files
cin
parents:
diff changeset
26 const int AlphabetSize = 1 << StateShift;
f2150c16b476 missing files
cin
parents:
diff changeset
27 const int UnclassifiedInput = (1 << StateShift) - 1;
f2150c16b476 missing files
cin
parents:
diff changeset
28 const byte UnreachableState = byte.MaxValue;
f2150c16b476 missing files
cin
parents:
diff changeset
29
f2150c16b476 missing files
cin
parents:
diff changeset
30 readonly TTag[] m_tags;
f2150c16b476 missing files
cin
parents:
diff changeset
31 readonly bool[] m_final;
f2150c16b476 missing files
cin
parents:
diff changeset
32
f2150c16b476 missing files
cin
parents:
diff changeset
33 readonly byte m_initialState;
f2150c16b476 missing files
cin
parents:
diff changeset
34 readonly byte[] m_dfa;
f2150c16b476 missing files
cin
parents:
diff changeset
35
f2150c16b476 missing files
cin
parents:
diff changeset
36 int m_position;
f2150c16b476 missing files
cin
parents:
diff changeset
37 byte m_state;
f2150c16b476 missing files
cin
parents:
diff changeset
38
f2150c16b476 missing files
cin
parents:
diff changeset
39 protected FastInputScanner(byte[] table, bool[] final, TTag[] tags, byte initial) {
f2150c16b476 missing files
cin
parents:
diff changeset
40 m_dfa = table;
f2150c16b476 missing files
cin
parents:
diff changeset
41 m_final = final;
f2150c16b476 missing files
cin
parents:
diff changeset
42 m_tags = tags;
f2150c16b476 missing files
cin
parents:
diff changeset
43 m_initialState = initial;
f2150c16b476 missing files
cin
parents:
diff changeset
44 }
f2150c16b476 missing files
cin
parents:
diff changeset
45
f2150c16b476 missing files
cin
parents:
diff changeset
46 public FastInputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, int[] inputMap) {
f2150c16b476 missing files
cin
parents:
diff changeset
47 var states = dfaTable.GetLength(0);
f2150c16b476 missing files
cin
parents:
diff changeset
48 Debug.Assert(states < byte.MaxValue);
f2150c16b476 missing files
cin
parents:
diff changeset
49
f2150c16b476 missing files
cin
parents:
diff changeset
50 m_dfa = new byte[states << StateShift];
f2150c16b476 missing files
cin
parents:
diff changeset
51 m_initialState = (byte)initialState;
f2150c16b476 missing files
cin
parents:
diff changeset
52
f2150c16b476 missing files
cin
parents:
diff changeset
53 m_tags = tags;
f2150c16b476 missing files
cin
parents:
diff changeset
54 m_final = finalStates;
f2150c16b476 missing files
cin
parents:
diff changeset
55
f2150c16b476 missing files
cin
parents:
diff changeset
56 // iterate over states
f2150c16b476 missing files
cin
parents:
diff changeset
57 for(int si = 0; si < states; si++) {
f2150c16b476 missing files
cin
parents:
diff changeset
58 // state offset for the new table
f2150c16b476 missing files
cin
parents:
diff changeset
59 var offset = si << StateShift;
f2150c16b476 missing files
cin
parents:
diff changeset
60
f2150c16b476 missing files
cin
parents:
diff changeset
61 // iterate over alphabet
f2150c16b476 missing files
cin
parents:
diff changeset
62 for(int a = 0; a < AlphabetSize; a++) {
f2150c16b476 missing files
cin
parents:
diff changeset
63 var next = dfaTable[si, a < inputMap.Length ? inputMap[a] : AutomatonConst.UnclassifiedInput];
f2150c16b476 missing files
cin
parents:
diff changeset
64 if (next == AutomatonConst.UnreachableState)
f2150c16b476 missing files
cin
parents:
diff changeset
65 next = UnreachableState;
f2150c16b476 missing files
cin
parents:
diff changeset
66
f2150c16b476 missing files
cin
parents:
diff changeset
67 m_dfa[offset | a] = (byte)next;
f2150c16b476 missing files
cin
parents:
diff changeset
68 }
f2150c16b476 missing files
cin
parents:
diff changeset
69 }
f2150c16b476 missing files
cin
parents:
diff changeset
70 }
f2150c16b476 missing files
cin
parents:
diff changeset
71
f2150c16b476 missing files
cin
parents:
diff changeset
72 public TTag Tag {
f2150c16b476 missing files
cin
parents:
diff changeset
73 [MethodImpl(MethodImplOptions.AggressiveInlining)]
f2150c16b476 missing files
cin
parents:
diff changeset
74 get {
f2150c16b476 missing files
cin
parents:
diff changeset
75 return m_tags[m_state];
f2150c16b476 missing files
cin
parents:
diff changeset
76 }
f2150c16b476 missing files
cin
parents:
diff changeset
77 }
f2150c16b476 missing files
cin
parents:
diff changeset
78
f2150c16b476 missing files
cin
parents:
diff changeset
79 public int Position {
f2150c16b476 missing files
cin
parents:
diff changeset
80 [MethodImpl(MethodImplOptions.AggressiveInlining)]
f2150c16b476 missing files
cin
parents:
diff changeset
81 get {
f2150c16b476 missing files
cin
parents:
diff changeset
82 return m_position;
f2150c16b476 missing files
cin
parents:
diff changeset
83 }
f2150c16b476 missing files
cin
parents:
diff changeset
84 }
f2150c16b476 missing files
cin
parents:
diff changeset
85
f2150c16b476 missing files
cin
parents:
diff changeset
86 public bool IsFinal {
f2150c16b476 missing files
cin
parents:
diff changeset
87 [MethodImpl(MethodImplOptions.AggressiveInlining)]
f2150c16b476 missing files
cin
parents:
diff changeset
88 get {
f2150c16b476 missing files
cin
parents:
diff changeset
89 return m_final[m_state];
f2150c16b476 missing files
cin
parents:
diff changeset
90 }
f2150c16b476 missing files
cin
parents:
diff changeset
91 }
f2150c16b476 missing files
cin
parents:
diff changeset
92
f2150c16b476 missing files
cin
parents:
diff changeset
93 [MethodImpl(MethodImplOptions.AggressiveInlining)]
f2150c16b476 missing files
cin
parents:
diff changeset
94 public void ResetState() {
f2150c16b476 missing files
cin
parents:
diff changeset
95 m_state = m_initialState;
f2150c16b476 missing files
cin
parents:
diff changeset
96 }
f2150c16b476 missing files
cin
parents:
diff changeset
97
f2150c16b476 missing files
cin
parents:
diff changeset
98 public FastInputScanner<TTag> Clone() {
f2150c16b476 missing files
cin
parents:
diff changeset
99 var clone = new FastInputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState);
f2150c16b476 missing files
cin
parents:
diff changeset
100 clone.m_state = m_state;
f2150c16b476 missing files
cin
parents:
diff changeset
101 clone.m_position = m_position;
f2150c16b476 missing files
cin
parents:
diff changeset
102 return clone;
f2150c16b476 missing files
cin
parents:
diff changeset
103 }
f2150c16b476 missing files
cin
parents:
diff changeset
104
f2150c16b476 missing files
cin
parents:
diff changeset
105 public bool Scan(char[] data, int offset, int max) {
f2150c16b476 missing files
cin
parents:
diff changeset
106 var next = m_state;
f2150c16b476 missing files
cin
parents:
diff changeset
107
f2150c16b476 missing files
cin
parents:
diff changeset
108 m_position = offset;
f2150c16b476 missing files
cin
parents:
diff changeset
109 while (m_position < max) {
f2150c16b476 missing files
cin
parents:
diff changeset
110 var ch = data[m_position];
f2150c16b476 missing files
cin
parents:
diff changeset
111
f2150c16b476 missing files
cin
parents:
diff changeset
112 next = m_dfa[(ch >= AlphabetSize ? (next << StateShift) | UnclassifiedInput : (next << StateShift) | ch)];
f2150c16b476 missing files
cin
parents:
diff changeset
113
f2150c16b476 missing files
cin
parents:
diff changeset
114 if (next == UnreachableState)
f2150c16b476 missing files
cin
parents:
diff changeset
115 // scanner stops at the next position after the last recognized symbol
f2150c16b476 missing files
cin
parents:
diff changeset
116 return false;
f2150c16b476 missing files
cin
parents:
diff changeset
117
f2150c16b476 missing files
cin
parents:
diff changeset
118 m_state = next;
f2150c16b476 missing files
cin
parents:
diff changeset
119 m_position++;
f2150c16b476 missing files
cin
parents:
diff changeset
120 }
f2150c16b476 missing files
cin
parents:
diff changeset
121
f2150c16b476 missing files
cin
parents:
diff changeset
122 return true;
f2150c16b476 missing files
cin
parents:
diff changeset
123 }
f2150c16b476 missing files
cin
parents:
diff changeset
124
f2150c16b476 missing files
cin
parents:
diff changeset
125
f2150c16b476 missing files
cin
parents:
diff changeset
126 }
f2150c16b476 missing files
cin
parents:
diff changeset
127 }