comparison Implab/Formats/TextScanner.cs @ 192:f1da3afc3521 release v2.1

Слияние с v2
author cin
date Fri, 22 Apr 2016 13:10:34 +0300
parents 76e8f2ba12b8
children
comparison
equal deleted inserted replaced
71:1714fd8678ef 192:f1da3afc3521
1 using System;
2 using Implab.Components;
3 using System.Diagnostics;
4 using Implab.Automaton;
5 using System.Text;
6
7 namespace Implab.Formats {
8 public abstract class TextScanner : Disposable {
9 readonly int m_bufferMax;
10 readonly int m_chunkSize;
11
12 char[] m_buffer;
13 int m_bufferOffset;
14 int m_bufferSize;
15 int m_tokenOffset;
16 int m_tokenLength;
17
18 /// <summary>
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
20 /// </summary>
21 /// <param name="bufferMax">Buffer max.</param>
22 /// <param name="chunkSize">Chunk size.</param>
23 protected TextScanner(int bufferMax, int chunkSize) {
24 Debug.Assert(m_chunkSize <= m_bufferMax);
25
26 m_bufferMax = bufferMax;
27 m_chunkSize = chunkSize;
28 }
29
30 /// <summary>
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
32 /// </summary>
33 /// <param name="buffer">Buffer.</param>
34 protected TextScanner(char[] buffer) {
35 if (buffer != null) {
36 m_buffer = buffer;
37 m_bufferSize = buffer.Length;
38 }
39 }
40
41 /// <summary>
42 /// (hungry) Reads the next token.
43 /// </summary>
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
45 /// <param name="dfa">The transition map for the automaton</param>
46 /// <param name="final">Final states of the automaton.</param>
47 /// <param name="tags">Tags.</param>
48 /// <param name="state">The initial state for the automaton.</param>
49 /// <param name="alphabet"></param>
50 /// <param name = "tag"></param>
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 m_tokenLength = 0;
53 tag = null;
54
55 var maxSymbol = alphabet.Length - 1;
56 int next;
57 do {
58 // after the next chunk is read the offset in the buffer may change
59 int pos = m_bufferOffset + m_tokenLength;
60 next = state;
61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos];
63
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65
66 if (next == AutomatonConst.UNREACHABLE_STATE)
67 break;
68
69 state = next;
70 pos++;
71 }
72 m_tokenLength = pos - m_bufferOffset;
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
74
75 m_tokenOffset = m_bufferOffset;
76 m_bufferOffset += m_tokenLength;
77
78 if (final[state]) {
79 tag = tags[state];
80 return true;
81 }
82
83 if (m_bufferOffset == m_bufferSize) {
84 if (m_tokenLength == 0) //EOF
85 return false;
86
87 throw new ParserException();
88 }
89
90 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
91
92 }
93
94 protected void Feed(char[] buffer, int offset, int length) {
95 m_buffer = buffer;
96 m_bufferOffset = offset;
97 m_bufferSize = offset + length;
98 }
99
100 protected bool Feed() {
101 if (m_chunkSize <= 0)
102 return false;
103
104 if (m_buffer != null) {
105 var free = m_buffer.Length - m_bufferSize;
106
107 if (free < m_chunkSize) {
108 free += m_chunkSize;
109 var used = m_bufferSize - m_bufferOffset;
110 var size = used + free;
111
112 if (size > m_bufferMax)
113 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
114
115 var temp = new char[size];
116
117 var read = Read(temp, used, m_chunkSize);
118 if (read == 0)
119 return false;
120
121 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
122
123 m_bufferOffset = 0;
124 m_bufferSize = used + read;
125 m_buffer = temp;
126 } else {
127 var read = Read(m_buffer, m_bufferSize, m_chunkSize);
128 if (read == 0)
129 return false;
130 m_bufferSize += m_chunkSize;
131 }
132 return true;
133 } else {
134 Debug.Assert(m_bufferOffset == 0);
135 m_buffer = new char[m_chunkSize];
136 m_bufferSize = Read(m_buffer, 0, m_chunkSize);
137 return (m_bufferSize != 0);
138 }
139 }
140
141 protected abstract int Read(char[] buffer, int offset, int size);
142
143 public string GetTokenValue() {
144 return new String(m_buffer, m_tokenOffset, m_tokenLength);
145 }
146
147 public void CopyTokenTo(char[] buffer, int offset) {
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
149 }
150
151 public void CopyTokenTo(StringBuilder sb) {
152 sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
153 }
154
155 }
156 }
157