comparison Implab/Formats/Json/JsonScanner.cs @ 235:b49969a7043c v2

Слияние
author cin
date Thu, 05 Oct 2017 09:24:49 +0300
parents 3e26338eb977
children 302ca905c19e
comparison
equal deleted inserted replaced
234:8dd666e6b6bf 235:b49969a7043c
1 using System;
2 using System.Globalization;
3 using Implab.Automaton;
4 using System.Text;
5 using Implab.Components;
6 using System.IO;
7
8 namespace Implab.Formats.Json {
9 /// <summary>
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
11 /// </summary>
12 public abstract class JsonScanner : Disposable {
13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
15
16 readonly char[] m_unescapeBuf = new char[4];
17 readonly char[] m_buffer;
18 int m_length;
19 int m_pos;
20 readonly StringBuilder m_tokenBuilder = new StringBuilder();
21
22 protected JsonScanner(char[] buffer, int pos, int length) {
23 m_buffer = buffer;
24 m_pos = pos;
25 m_length = length;
26 }
27
28 bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
29 scanner.ResetState();
30
31 while(scanner.Scan(m_buffer, m_pos, m_length)) {
32 // scanner requests new data
33
34 if (m_pos != m_length) // capture results for the future
35 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
36
37 // read next data
38 m_length = Read(m_buffer, 0, m_buffer.Length);
39
40 if (m_length == 0) {
41 // no data is read
42 if (scanner.Position == m_pos) {
43 // scanned hasn't moved, that's the end
44 m_pos = 0;
45 tokenType = JsonGrammar.TokenType.None;
46 return false;
47 }
48
49 if (scanner.IsFinal) {
50 m_pos = 0;
51 tokenType = scanner.Tag;
52 return true;
53 } else {
54 throw new ParserException("Unexpected EOF");
55 }
56 }
57
58 m_pos = 0;
59 }
60 var scannerPos = scanner.Position;
61
62 // scanner stops as scannerPos
63 if (!scanner.IsFinal)
64 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
65
66 tokenType = scanner.Tag;
67 if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal)
68 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
69
70 m_pos = scannerPos;
71 return true;
72 }
73
74 bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
75 scanner.ResetState();
76
77 while (scanner.Scan(m_buffer, m_pos, m_length)) {
78 // scanner requests new data
79
80 if (m_pos != m_length) // capture results for the future
81 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
82
83 // read next data
84 m_length = Read(m_buffer, 0, m_buffer.Length);
85
86 if (m_length == 0) {
87 // no data is read
88 if (scanner.Position == m_pos) {
89 // scanned hasn't moved, that's the end
90 m_pos = 0;
91 tokenType = JsonGrammar.TokenType.None;
92 return false;
93 }
94
95 if (scanner.IsFinal) {
96 m_pos = 0;
97 tokenType = scanner.Tag;
98 return true;
99 } else {
100 throw new ParserException("Unexpected EOF");
101 }
102 }
103
104 m_pos = 0;
105 }
106 var scannerPos = scanner.Position;
107
108 // scanner stops as scannerPos
109 if (!scanner.IsFinal)
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
111
112 if (scannerPos != m_pos) {
113 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
114 m_pos = scannerPos;
115 }
116 tokenType = scanner.Tag;
117 return true;
118 }
119
120 protected abstract int Read(char[] buffer, int offset, int size);
121
122
123 /// <summary>
124 /// Читает следующий лексический элемент из входных данных.
125 /// </summary>
126 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
127 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
128 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
129 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
130 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
131 public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) {
132 JsonGrammar.TokenType tag;
133 m_tokenBuilder.Clear();
134 while (ReadChunk(m_jsonContext, out tag)) {
135 switch (tag) {
136 case JsonGrammar.TokenType.StringBound:
137 tokenValue = ReadString();
138 tokenType = JsonTokenType.String;
139 break;
140 case JsonGrammar.TokenType.Number:
141 tokenValue = m_tokenBuilder.ToString();
142 tokenType = JsonTokenType.Number;
143 break;
144 case JsonGrammar.TokenType.Literal:
145 tokenType = JsonTokenType.Literal;
146 tokenValue = m_tokenBuilder.ToString();
147 break;
148 case JsonGrammar.TokenType.Whitespace:
149 m_tokenBuilder.Clear();
150 continue;
151 default:
152 tokenType = (JsonTokenType)tag;
153 tokenValue = null;
154 break;
155 }
156 return true;
157 }
158 tokenValue = null;
159 tokenType = JsonTokenType.None;
160 return false;
161 }
162
163 string ReadString() {
164 JsonGrammar.TokenType tag;
165 m_tokenBuilder.Clear();
166
167 while (ReadStringChunk(m_stringContext, out tag)) {
168 switch (tag) {
169 case JsonGrammar.TokenType.StringBound:
170 m_tokenBuilder.Length--;
171 return m_tokenBuilder.ToString();
172 case JsonGrammar.TokenType.UnescapedChar:
173 break;
174 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
175 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
176 m_tokenBuilder.Length -= 6;
177 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
178 break;
179 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
180 var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
181 m_tokenBuilder.Length -= 2;
182 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
183 break;
184 }
185 }
186
187 throw new ParserException("Unexpected end of data");
188 }
189 }
190 }