annotate Implab/Parsing/Scanner.cs @ 58:1710dcda34bb

Added JSONXmlReader
author cin
date Tue, 17 Jun 2014 19:40:43 +0400
parents 7759c80cad95
children 21611344d366
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
55
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
1 using Implab;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
2 using System;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
3 using System.Collections.Generic;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
4 using System.Linq;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
5 using System.Text;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
6 using System.Threading.Tasks;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
7
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
8 namespace Implab.Parsing {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
9 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
10 /// Базовый класс для разбора потока входных символов на токены.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
11 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
12 /// <remarks>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
13 /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
14 /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
15 /// конца токена и допустимости текущего символа.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
16 /// </remarks>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
17 public class Scanner {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
18 struct ScannerConfig {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
19 public DFAStateDescriptior[] states;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
20 public int[] alphabetMap;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
21 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
22
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
23 Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
24
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
25 DFAStateDescriptior[] m_states;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
26 int[] m_alphabetMap;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
27
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
28 protected DFAStateDescriptior m_currentState;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
29 int m_previewCode;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
30
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
31 protected int m_tokenLen = 0;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
32 protected int m_tokenOffset;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
33
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
34 protected char[] m_buffer;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
35 protected int m_bufferSize;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
36 protected int m_pointer;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
37
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
38 public Scanner(CDFADefinition definition, string text) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
39 Safe.ArgumentNotNull(definition, "definition");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
40 Safe.ArgumentNotEmpty(text, "text");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
41
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
42 m_states = definition.States;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
43 m_alphabetMap = definition.Alphabet.GetTranslationMap();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
44
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
45 Feed(text.ToCharArray());
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
46 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
47
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
48 public Scanner(CDFADefinition definition) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
49 Safe.ArgumentNotNull(definition, "definition");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
50
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
51 m_states = definition.States;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
52 m_alphabetMap = definition.Alphabet.GetTranslationMap();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
53
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
54 Feed(new char[0]);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
55 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
56
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
57 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
58 /// Заполняет входными данными буффер.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
59 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
60 /// <param name="data">Данные для обработки.</param>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
61 /// <remarks>Копирование данных не происходит, переданный массив используется в
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
62 /// качестве входного буффера.</remarks>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
63 public void Feed(char[] data) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
64 Safe.ArgumentNotNull(data, "data");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
65
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
66 Feed(data, data.Length);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
67 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
68
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
69 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
70 /// Заполняет буффур чтения входными данными.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
71 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
72 /// <param name="data">Данные для обработки.</param>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
73 /// <param name="length">Длина данных для обработки.</param>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
74 /// <remarks>Копирование данных не происходит, переданный массив используется в
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
75 /// качестве входного буффера.</remarks>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
76 public void Feed(char[] data, int length) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
77 Safe.ArgumentNotNull(data, "data");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
78 Safe.ArgumentInRange(length, 0, data.Length, "length");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
79
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
80 m_pointer = -1;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
81 m_buffer = data;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
82 m_bufferSize = length;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
83 Shift();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
84 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
85
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
86 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
87 /// Получает текущий токен в виде строки.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
88 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
89 /// <returns></returns>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
90 public string GetTokenValue() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
91 return new String(m_buffer, m_tokenOffset, m_tokenLen);
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
92 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
93
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
94 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
95 /// Метки текущего токена, которые были назначены в регулярном выражении.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
96 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
97 public int[] TokenTags {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
98 get {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
99 return m_currentState.tag;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
100 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
101 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
102
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
103 /// <summary>
57
7759c80cad95 minor changes
cin
parents: 55
diff changeset
104 /// Признак конца данных
7759c80cad95 minor changes
cin
parents: 55
diff changeset
105 /// </summary>
7759c80cad95 minor changes
cin
parents: 55
diff changeset
106 public bool EOF {
7759c80cad95 minor changes
cin
parents: 55
diff changeset
107 get {
7759c80cad95 minor changes
cin
parents: 55
diff changeset
108 return m_pointer >= m_bufferSize;
7759c80cad95 minor changes
cin
parents: 55
diff changeset
109 }
7759c80cad95 minor changes
cin
parents: 55
diff changeset
110 }
7759c80cad95 minor changes
cin
parents: 55
diff changeset
111
7759c80cad95 minor changes
cin
parents: 55
diff changeset
112 /// <summary>
55
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
113 /// Читает следующий токен, при этом <see cref="m_tokenOffset"/> указывает на начало токена,
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
114 /// <see cref="m_tokenLen"/> на длину токена, <see cref="m_buffer"/> - массив символов, в
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
115 /// котором находится токен.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
116 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
117 /// <returns><c>false</c> - достигнут конец данных, токен не прочитан.</returns>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
118 protected bool ReadTokenInternal() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
119 if (m_pointer >= m_bufferSize)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
120 return false;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
121
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
122 m_currentState = m_states[CDFADefinition.INITIAL_STATE];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
123 m_tokenLen = 0;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
124 m_tokenOffset = m_pointer;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
125 int nextState = CDFADefinition.UNREACHEBLE_STATE;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
126 do {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
127 nextState = m_currentState.transitions[m_previewCode];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
128 if (nextState == CDFADefinition.UNREACHEBLE_STATE) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
129 if (m_currentState.final)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
130 return true;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
131 else
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
132 throw new ParserException(
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
133 String.Format(
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
134 "Unexpected symbol '{0}', at pos {1}",
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
135 m_buffer[m_pointer],
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
136 Position
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
137 )
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
138 );
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
139 } else {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
140 m_currentState = m_states[nextState];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
141 m_tokenLen++;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
142 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
143
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
144 } while (Shift());
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
145
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
146 // END OF DATA
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
147 if (!m_currentState.final)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
148 throw new ParserException("Unexpected end of data");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
149
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
150 return true;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
151 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
152
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
153
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
154 bool Shift() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
155 m_pointer++;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
156
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
157 if (m_pointer >= m_bufferSize) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
158 return ReadNextChunk();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
159 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
160
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
161 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
162
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
163 return true;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
164 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
165
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
166 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
167 /// Вызывается по достижению конца входного буффера для получения
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
168 /// новых данных.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
169 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
170 /// <returns><c>true</c> - новые двнные получены, можно продолжать обработку.</returns>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
171 protected virtual bool ReadNextChunk() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
172 return false;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
173 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
174
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
175 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
176 /// Позиция сканнера во входном буфере
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
177 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
178 public int Position {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
179 get {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
180 return m_pointer + 1;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
181 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
182 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
183
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
184 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
185 /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
186 /// группировки.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
187 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
188 /// <param name="states">Таблица состояний нового ДКА</param>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
189 /// <param name="alphabet">Таблица входных символов для нового ДКА</param>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
190 protected void Switch(DFAStateDescriptior[] states, int[] alphabet) {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
191 Safe.ArgumentNotNull(states, "dfa");
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
192
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
193 m_defs.Push(new ScannerConfig {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
194 states = m_states,
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
195 alphabetMap = m_alphabetMap
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
196 });
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
197
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
198 m_states = states;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
199 m_alphabetMap = alphabet;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
200
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
201 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
202 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
203
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
204 /// <summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
205 /// Восстанавливает предыдущей ДКА сканнера.
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
206 /// </summary>
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
207 protected void Restore() {
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
208 if (m_defs.Count == 0)
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
209 throw new InvalidOperationException();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
210 var prev = m_defs.Pop();
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
211 m_states = prev.states;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
212 m_alphabetMap = prev.alphabetMap;
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
213 m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
214 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
215 }
c0bf853aa04f Added initial JSON support
cin
parents:
diff changeset
216 }