annotate Implab/Formats/TextScanner.cs @ 209:a867536c68fc v2

Bound promise to CancellationToken Added new states to ExecutionSate enum. Added Safe.Guard() method to handle cleanup of the result of the promise
author cin
date Wed, 16 Nov 2016 03:06:08 +0300
parents 76e8f2ba12b8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
1 using System;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
2 using Implab.Components;
175
cin
parents: 174
diff changeset
3 using System.Diagnostics;
cin
parents: 174
diff changeset
4 using Implab.Automaton;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
5 using System.Text;
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
6
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
7 namespace Implab.Formats {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
8 public abstract class TextScanner : Disposable {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
9 readonly int m_bufferMax;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
10 readonly int m_chunkSize;
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
11
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
12 char[] m_buffer;
174
cin
parents: 173
diff changeset
13 int m_bufferOffset;
175
cin
parents: 174
diff changeset
14 int m_bufferSize;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
15 int m_tokenOffset;
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
16 int m_tokenLength;
174
cin
parents: 173
diff changeset
17
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
18 /// <summary>
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
20 /// </summary>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
21 /// <param name="bufferMax">Buffer max.</param>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
22 /// <param name="chunkSize">Chunk size.</param>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
23 protected TextScanner(int bufferMax, int chunkSize) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
24 Debug.Assert(m_chunkSize <= m_bufferMax);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
25
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
26 m_bufferMax = bufferMax;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
27 m_chunkSize = chunkSize;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
28 }
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
29
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
30 /// <summary>
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
32 /// </summary>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
33 /// <param name="buffer">Buffer.</param>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
34 protected TextScanner(char[] buffer) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
35 if (buffer != null) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
36 m_buffer = buffer;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
37 m_bufferSize = buffer.Length;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
38 }
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
39 }
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
40
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
41 /// <summary>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
42 /// (hungry) Reads the next token.
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
43 /// </summary>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
45 /// <param name="dfa">The transition map for the automaton</param>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
46 /// <param name="final">Final states of the automaton.</param>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
47 /// <param name="tags">Tags.</param>
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
48 /// <param name="state">The initial state for the automaton.</param>
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
49 /// <param name="alphabet"></param>
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
50 /// <param name = "tag"></param>
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
52 m_tokenLength = 0;
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
53 tag = null;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
54
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
55 var maxSymbol = alphabet.Length - 1;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
56 int next;
175
cin
parents: 174
diff changeset
57 do {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
58 // after the next chunk is read the offset in the buffer may change
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
59 int pos = m_bufferOffset + m_tokenLength;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
60 next = state;
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
61 while (pos < m_bufferSize) {
175
cin
parents: 174
diff changeset
62 var ch = m_buffer[pos];
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
63
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
65
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
66 if (next == AutomatonConst.UNREACHABLE_STATE)
175
cin
parents: 174
diff changeset
67 break;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
68
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
69 state = next;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
70 pos++;
175
cin
parents: 174
diff changeset
71 }
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
72 m_tokenLength = pos - m_bufferOffset;
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
74
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
75 m_tokenOffset = m_bufferOffset;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
76 m_bufferOffset += m_tokenLength;
174
cin
parents: 173
diff changeset
77
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
78 if (final[state]) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
79 tag = tags[state];
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
80 return true;
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
81 }
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
82
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
83 if (m_bufferOffset == m_bufferSize) {
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
84 if (m_tokenLength == 0) //EOF
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
85 return false;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
86
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
87 throw new ParserException();
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
88 }
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
89
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
90 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
91
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
92 }
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
93
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
94 protected void Feed(char[] buffer, int offset, int length) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
95 m_buffer = buffer;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
96 m_bufferOffset = offset;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
97 m_bufferSize = offset + length;
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
98 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
99
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
100 protected bool Feed() {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
101 if (m_chunkSize <= 0)
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
102 return false;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
103
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
104 if (m_buffer != null) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
105 var free = m_buffer.Length - m_bufferSize;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
106
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
107 if (free < m_chunkSize) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
108 free += m_chunkSize;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
109 var used = m_bufferSize - m_bufferOffset;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
110 var size = used + free;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
111
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
112 if (size > m_bufferMax)
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
113 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
114
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
115 var temp = new char[size];
175
cin
parents: 174
diff changeset
116
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
117 var read = Read(temp, used, m_chunkSize);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
118 if (read == 0)
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
119 return false;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
120
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
121 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
122
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
123 m_bufferOffset = 0;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
124 m_bufferSize = used + read;
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
125 m_buffer = temp;
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
126 } else {
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
127 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
128 if (read == 0)
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
129 return false;
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
130 m_bufferSize += m_chunkSize;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
131 }
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
132 return true;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
133 } else {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
134 Debug.Assert(m_bufferOffset == 0);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
135 m_buffer = new char[m_chunkSize];
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
136 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
137 return (m_bufferSize != 0);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
138 }
175
cin
parents: 174
diff changeset
139 }
cin
parents: 174
diff changeset
140
cin
parents: 174
diff changeset
141 protected abstract int Read(char[] buffer, int offset, int size);
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
142
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
143 public string GetTokenValue() {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
144 return new String(m_buffer, m_tokenOffset, m_tokenLength);
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
145 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
146
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
147 public void CopyTokenTo(char[] buffer, int offset) {
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
176
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
149 }
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
150
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
151 public void CopyTokenTo(StringBuilder sb) {
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
152 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
0c3c69fe225b rewritten the text scanner
cin
parents: 175
diff changeset
153 }
175
cin
parents: 174
diff changeset
154
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
155 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
156 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
157