Mercurial > pub > ImplabNet
comparison Implab/Formats/TextScanner.cs @ 190:1c2a16d071a7 v2
Слияние с ref20160224
author | cin |
---|---|
date | Fri, 22 Apr 2016 13:08:08 +0300 |
parents | 76e8f2ba12b8 |
children |
comparison
equal
deleted
inserted
replaced
161:2a8466f0cb8a | 190:1c2a16d071a7 |
---|---|
1 using System; | |
2 using Implab.Components; | |
3 using System.Diagnostics; | |
4 using Implab.Automaton; | |
5 using System.Text; | |
6 | |
7 namespace Implab.Formats { | |
8 public abstract class TextScanner : Disposable { | |
9 readonly int m_bufferMax; | |
10 readonly int m_chunkSize; | |
11 | |
12 char[] m_buffer; | |
13 int m_bufferOffset; | |
14 int m_bufferSize; | |
15 int m_tokenOffset; | |
16 int m_tokenLength; | |
17 | |
18 /// <summary> | |
19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
20 /// </summary> | |
21 /// <param name="bufferMax">Buffer max.</param> | |
22 /// <param name="chunkSize">Chunk size.</param> | |
23 protected TextScanner(int bufferMax, int chunkSize) { | |
24 Debug.Assert(m_chunkSize <= m_bufferMax); | |
25 | |
26 m_bufferMax = bufferMax; | |
27 m_chunkSize = chunkSize; | |
28 } | |
29 | |
30 /// <summary> | |
31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
32 /// </summary> | |
33 /// <param name="buffer">Buffer.</param> | |
34 protected TextScanner(char[] buffer) { | |
35 if (buffer != null) { | |
36 m_buffer = buffer; | |
37 m_bufferSize = buffer.Length; | |
38 } | |
39 } | |
40 | |
41 /// <summary> | |
42 /// (hungry) Reads the next token. | |
43 /// </summary> | |
44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> | |
45 /// <param name="dfa">The transition map for the automaton</param> | |
46 /// <param name="final">Final states of the automaton.</param> | |
47 /// <param name="tags">Tags.</param> | |
48 /// <param name="state">The initial state for the automaton.</param> | |
49 /// <param name="alphabet"></param> | |
50 /// <param name = "tag"></param> | |
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
52 m_tokenLength = 0; | |
53 tag = null; | |
54 | |
55 var maxSymbol = alphabet.Length - 1; | |
56 int next; | |
57 do { | |
58 // after the next chunk is read the offset in the buffer may change | |
59 int pos = m_bufferOffset + m_tokenLength; | |
60 next = state; | |
61 while (pos < m_bufferSize) { | |
62 var ch = m_buffer[pos]; | |
63 | |
64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
65 | |
66 if (next == AutomatonConst.UNREACHABLE_STATE) | |
67 break; | |
68 | |
69 state = next; | |
70 pos++; | |
71 } | |
72 m_tokenLength = pos - m_bufferOffset; | |
73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
74 | |
75 m_tokenOffset = m_bufferOffset; | |
76 m_bufferOffset += m_tokenLength; | |
77 | |
78 if (final[state]) { | |
79 tag = tags[state]; | |
80 return true; | |
81 } | |
82 | |
83 if (m_bufferOffset == m_bufferSize) { | |
84 if (m_tokenLength == 0) //EOF | |
85 return false; | |
86 | |
87 throw new ParserException(); | |
88 } | |
89 | |
90 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
91 | |
92 } | |
93 | |
94 protected void Feed(char[] buffer, int offset, int length) { | |
95 m_buffer = buffer; | |
96 m_bufferOffset = offset; | |
97 m_bufferSize = offset + length; | |
98 } | |
99 | |
100 protected bool Feed() { | |
101 if (m_chunkSize <= 0) | |
102 return false; | |
103 | |
104 if (m_buffer != null) { | |
105 var free = m_buffer.Length - m_bufferSize; | |
106 | |
107 if (free < m_chunkSize) { | |
108 free += m_chunkSize; | |
109 var used = m_bufferSize - m_bufferOffset; | |
110 var size = used + free; | |
111 | |
112 if (size > m_bufferMax) | |
113 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024)); | |
114 | |
115 var temp = new char[size]; | |
116 | |
117 var read = Read(temp, used, m_chunkSize); | |
118 if (read == 0) | |
119 return false; | |
120 | |
121 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); | |
122 | |
123 m_bufferOffset = 0; | |
124 m_bufferSize = used + read; | |
125 m_buffer = temp; | |
126 } else { | |
127 var read = Read(m_buffer, m_bufferSize, m_chunkSize); | |
128 if (read == 0) | |
129 return false; | |
130 m_bufferSize += m_chunkSize; | |
131 } | |
132 return true; | |
133 } else { | |
134 Debug.Assert(m_bufferOffset == 0); | |
135 m_buffer = new char[m_chunkSize]; | |
136 m_bufferSize = Read(m_buffer, 0, m_chunkSize); | |
137 return (m_bufferSize != 0); | |
138 } | |
139 } | |
140 | |
141 protected abstract int Read(char[] buffer, int offset, int size); | |
142 | |
143 public string GetTokenValue() { | |
144 return new String(m_buffer, m_tokenOffset, m_tokenLength); | |
145 } | |
146 | |
147 public void CopyTokenTo(char[] buffer, int offset) { | |
148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); | |
149 } | |
150 | |
151 public void CopyTokenTo(StringBuilder sb) { | |
152 sb.Append(m_buffer, m_tokenOffset, m_tokenLength); | |
153 } | |
154 | |
155 } | |
156 } | |
157 |