165
|
1 using System;
|
163
|
2 using System.Globalization;
|
165
|
3 using Implab.Automaton;
|
176
|
4 using System.Text;
|
|
5 using Implab.Components;
|
|
6 using System.IO;
|
163
|
7
|
228
|
8 namespace Implab.Formats.Json {
|
163
|
9 /// <summary>
|
|
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
|
|
11 /// </summary>
|
228
|
12 public abstract class JsonScanner : Disposable {
|
236
|
13 readonly FastInputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
|
|
14 readonly FastInputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
|
176
|
15
|
228
|
16 readonly char[] m_unescapeBuf = new char[4];
|
|
17 readonly char[] m_buffer;
|
|
18 int m_length;
|
|
19 int m_pos;
|
|
20 readonly StringBuilder m_tokenBuilder = new StringBuilder();
|
163
|
21
|
228
|
22 protected JsonScanner(char[] buffer, int pos, int length) {
|
|
23 m_buffer = buffer;
|
|
24 m_pos = pos;
|
|
25 m_length = length;
|
176
|
26 }
|
|
27
|
236
|
28 bool ReadChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
|
229
|
29 scanner.ResetState();
|
|
30
|
|
31 while(scanner.Scan(m_buffer, m_pos, m_length)) {
|
|
32 // scanner requests new data
|
176
|
33
|
229
|
34 if (m_pos != m_length) // capture results for the future
|
|
35 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
|
|
36
|
|
37 // read next data
|
228
|
38 m_length = Read(m_buffer, 0, m_buffer.Length);
|
229
|
39
|
228
|
40 if (m_length == 0) {
|
229
|
41 // no data is read
|
|
42 if (scanner.Position == m_pos) {
|
|
43 // scanned hasn't moved, that's the end
|
|
44 m_pos = 0;
|
|
45 tokenType = JsonGrammar.TokenType.None;
|
|
46 return false;
|
|
47 }
|
|
48
|
|
49 if (scanner.IsFinal) {
|
|
50 m_pos = 0;
|
|
51 tokenType = scanner.Tag;
|
|
52 return true;
|
|
53 } else {
|
|
54 throw new ParserException("Unexpected EOF");
|
|
55 }
|
228
|
56 }
|
229
|
57
|
228
|
58 m_pos = 0;
|
|
59 }
|
|
60 var scannerPos = scanner.Position;
|
229
|
61
|
|
62 // scanner stops as scannerPos
|
|
63 if (!scanner.IsFinal)
|
|
64 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
|
|
65
|
|
66 tokenType = scanner.Tag;
|
|
67 if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal)
|
|
68 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
|
|
69
|
|
70 m_pos = scannerPos;
|
|
71 return true;
|
|
72 }
|
|
73
|
236
|
74 bool ReadStringChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
|
229
|
75 scanner.ResetState();
|
|
76
|
|
77 while (scanner.Scan(m_buffer, m_pos, m_length)) {
|
|
78 // scanner requests new data
|
|
79
|
|
80 if (m_pos != m_length) // capture results for the future
|
|
81 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
|
|
82
|
|
83 // read next data
|
|
84 m_length = Read(m_buffer, 0, m_buffer.Length);
|
|
85
|
|
86 if (m_length == 0) {
|
|
87 // no data is read
|
|
88 if (scanner.Position == m_pos) {
|
|
89 // scanned hasn't moved, that's the end
|
|
90 m_pos = 0;
|
|
91 tokenType = JsonGrammar.TokenType.None;
|
|
92 return false;
|
|
93 }
|
|
94
|
|
95 if (scanner.IsFinal) {
|
|
96 m_pos = 0;
|
|
97 tokenType = scanner.Tag;
|
|
98 return true;
|
|
99 } else {
|
|
100 throw new ParserException("Unexpected EOF");
|
|
101 }
|
|
102 }
|
|
103
|
|
104 m_pos = 0;
|
|
105 }
|
|
106 var scannerPos = scanner.Position;
|
|
107
|
|
108 // scanner stops as scannerPos
|
|
109 if (!scanner.IsFinal)
|
236
|
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos]}'");
|
229
|
111
|
228
|
112 if (scannerPos != m_pos) {
|
|
113 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
|
|
114 m_pos = scannerPos;
|
|
115 }
|
|
116 tokenType = scanner.Tag;
|
|
117 return true;
|
163
|
118 }
|
|
119
|
228
|
120 protected abstract int Read(char[] buffer, int offset, int size);
|
|
121
|
180
|
122
|
163
|
123 /// <summary>
|
|
124 /// Читает следующий лексический элемент из входных данных.
|
|
125 /// </summary>
|
|
126 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
|
|
127 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
|
|
128 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
|
|
129 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
|
|
130 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
|
229
|
131 public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) {
|
228
|
132 JsonGrammar.TokenType tag;
|
|
133 m_tokenBuilder.Clear();
|
229
|
134 while (ReadChunk(m_jsonContext, out tag)) {
|
228
|
135 switch (tag) {
|
|
136 case JsonGrammar.TokenType.StringBound:
|
163
|
137 tokenValue = ReadString();
|
|
138 tokenType = JsonTokenType.String;
|
|
139 break;
|
228
|
140 case JsonGrammar.TokenType.Number:
|
229
|
141 tokenValue = m_tokenBuilder.ToString();
|
163
|
142 tokenType = JsonTokenType.Number;
|
|
143 break;
|
228
|
144 case JsonGrammar.TokenType.Literal:
|
|
145 tokenType = JsonTokenType.Literal;
|
|
146 tokenValue = m_tokenBuilder.ToString();
|
|
147 break;
|
|
148 case JsonGrammar.TokenType.Whitespace:
|
|
149 m_tokenBuilder.Clear();
|
183
|
150 continue;
|
163
|
151 default:
|
228
|
152 tokenType = (JsonTokenType)tag;
|
|
153 tokenValue = null;
|
163
|
154 break;
|
|
155 }
|
|
156 return true;
|
|
157 }
|
|
158 tokenValue = null;
|
|
159 tokenType = JsonTokenType.None;
|
|
160 return false;
|
|
161 }
|
|
162
|
|
163 string ReadString() {
|
228
|
164 JsonGrammar.TokenType tag;
|
|
165 m_tokenBuilder.Clear();
|
176
|
166
|
229
|
167 while (ReadStringChunk(m_stringContext, out tag)) {
|
228
|
168 switch (tag) {
|
|
169 case JsonGrammar.TokenType.StringBound:
|
|
170 m_tokenBuilder.Length--;
|
|
171 return m_tokenBuilder.ToString();
|
|
172 case JsonGrammar.TokenType.UnescapedChar:
|
163
|
173 break;
|
228
|
174 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
|
|
175 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
|
|
176 m_tokenBuilder.Length -= 6;
|
|
177 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
|
163
|
178 break;
|
228
|
179 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
|
|
180 var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
|
|
181 m_tokenBuilder.Length -= 2;
|
|
182 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
|
163
|
183 break;
|
|
184 }
|
|
185 }
|
|
186
|
|
187 throw new ParserException("Unexpected end of data");
|
|
188 }
|
|
189 }
|
|
190 }
|