Mercurial > pub > ImplabNet
comparison Implab/Formats/Json/JsonScanner.cs @ 232:133ba4444acc v2
Слияние
author | cin |
---|---|
date | Thu, 21 Sep 2017 01:14:27 +0300 |
parents | 3e26338eb977 |
children | 302ca905c19e |
comparison
equal
deleted
inserted
replaced
231:3eaa9372c754 | 232:133ba4444acc |
---|---|
1 using System; | |
2 using System.Globalization; | |
3 using Implab.Automaton; | |
4 using System.Text; | |
5 using Implab.Components; | |
6 using System.IO; | |
7 | |
8 namespace Implab.Formats.Json { | |
9 /// <summary> | |
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON. | |
11 /// </summary> | |
12 public abstract class JsonScanner : Disposable { | |
13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); | |
14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); | |
15 | |
16 readonly char[] m_unescapeBuf = new char[4]; | |
17 readonly char[] m_buffer; | |
18 int m_length; | |
19 int m_pos; | |
20 readonly StringBuilder m_tokenBuilder = new StringBuilder(); | |
21 | |
22 protected JsonScanner(char[] buffer, int pos, int length) { | |
23 m_buffer = buffer; | |
24 m_pos = pos; | |
25 m_length = length; | |
26 } | |
27 | |
28 bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
29 scanner.ResetState(); | |
30 | |
31 while(scanner.Scan(m_buffer, m_pos, m_length)) { | |
32 // scanner requests new data | |
33 | |
34 if (m_pos != m_length) // capture results for the future | |
35 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); | |
36 | |
37 // read next data | |
38 m_length = Read(m_buffer, 0, m_buffer.Length); | |
39 | |
40 if (m_length == 0) { | |
41 // no data is read | |
42 if (scanner.Position == m_pos) { | |
43 // scanned hasn't moved, that's the end | |
44 m_pos = 0; | |
45 tokenType = JsonGrammar.TokenType.None; | |
46 return false; | |
47 } | |
48 | |
49 if (scanner.IsFinal) { | |
50 m_pos = 0; | |
51 tokenType = scanner.Tag; | |
52 return true; | |
53 } else { | |
54 throw new ParserException("Unexpected EOF"); | |
55 } | |
56 } | |
57 | |
58 m_pos = 0; | |
59 } | |
60 var scannerPos = scanner.Position; | |
61 | |
62 // scanner stops as scannerPos | |
63 if (!scanner.IsFinal) | |
64 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); | |
65 | |
66 tokenType = scanner.Tag; | |
67 if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal) | |
68 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); | |
69 | |
70 m_pos = scannerPos; | |
71 return true; | |
72 } | |
73 | |
74 bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
75 scanner.ResetState(); | |
76 | |
77 while (scanner.Scan(m_buffer, m_pos, m_length)) { | |
78 // scanner requests new data | |
79 | |
80 if (m_pos != m_length) // capture results for the future | |
81 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); | |
82 | |
83 // read next data | |
84 m_length = Read(m_buffer, 0, m_buffer.Length); | |
85 | |
86 if (m_length == 0) { | |
87 // no data is read | |
88 if (scanner.Position == m_pos) { | |
89 // scanned hasn't moved, that's the end | |
90 m_pos = 0; | |
91 tokenType = JsonGrammar.TokenType.None; | |
92 return false; | |
93 } | |
94 | |
95 if (scanner.IsFinal) { | |
96 m_pos = 0; | |
97 tokenType = scanner.Tag; | |
98 return true; | |
99 } else { | |
100 throw new ParserException("Unexpected EOF"); | |
101 } | |
102 } | |
103 | |
104 m_pos = 0; | |
105 } | |
106 var scannerPos = scanner.Position; | |
107 | |
108 // scanner stops as scannerPos | |
109 if (!scanner.IsFinal) | |
110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); | |
111 | |
112 if (scannerPos != m_pos) { | |
113 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); | |
114 m_pos = scannerPos; | |
115 } | |
116 tokenType = scanner.Tag; | |
117 return true; | |
118 } | |
119 | |
120 protected abstract int Read(char[] buffer, int offset, int size); | |
121 | |
122 | |
123 /// <summary> | |
124 /// Читает следующий лексический элемент из входных данных. | |
125 /// </summary> | |
126 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> | |
127 /// <param name="tokenType">Возвращает тип прочитанного токена.</param> | |
128 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> | |
129 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. | |
130 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | |
131 public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) { | |
132 JsonGrammar.TokenType tag; | |
133 m_tokenBuilder.Clear(); | |
134 while (ReadChunk(m_jsonContext, out tag)) { | |
135 switch (tag) { | |
136 case JsonGrammar.TokenType.StringBound: | |
137 tokenValue = ReadString(); | |
138 tokenType = JsonTokenType.String; | |
139 break; | |
140 case JsonGrammar.TokenType.Number: | |
141 tokenValue = m_tokenBuilder.ToString(); | |
142 tokenType = JsonTokenType.Number; | |
143 break; | |
144 case JsonGrammar.TokenType.Literal: | |
145 tokenType = JsonTokenType.Literal; | |
146 tokenValue = m_tokenBuilder.ToString(); | |
147 break; | |
148 case JsonGrammar.TokenType.Whitespace: | |
149 m_tokenBuilder.Clear(); | |
150 continue; | |
151 default: | |
152 tokenType = (JsonTokenType)tag; | |
153 tokenValue = null; | |
154 break; | |
155 } | |
156 return true; | |
157 } | |
158 tokenValue = null; | |
159 tokenType = JsonTokenType.None; | |
160 return false; | |
161 } | |
162 | |
163 string ReadString() { | |
164 JsonGrammar.TokenType tag; | |
165 m_tokenBuilder.Clear(); | |
166 | |
167 while (ReadStringChunk(m_stringContext, out tag)) { | |
168 switch (tag) { | |
169 case JsonGrammar.TokenType.StringBound: | |
170 m_tokenBuilder.Length--; | |
171 return m_tokenBuilder.ToString(); | |
172 case JsonGrammar.TokenType.UnescapedChar: | |
173 break; | |
174 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence | |
175 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); | |
176 m_tokenBuilder.Length -= 6; | |
177 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); | |
178 break; | |
179 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence | |
180 var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; | |
181 m_tokenBuilder.Length -= 2; | |
182 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); | |
183 break; | |
184 } | |
185 } | |
186 | |
187 throw new ParserException("Unexpected end of data"); | |
188 } | |
189 } | |
190 } |