annotate Implab/Formats/JSON/JSONScanner.cs @ 187:dd4a3590f9c6 ref20160224

Reworked cancelation handling, if the cancel handler isn't specified the OperationCanceledException will be handled by the error handler Any unhandled OperationCanceledException will cause the promise cancelation
author cin
date Tue, 19 Apr 2016 17:35:20 +0300
parents 4f82e0f161c3
children 7d07503621fe
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
1 using System;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
2 using System.Globalization;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
3 using Implab.Automaton;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
4 using System.Text;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
5 using Implab.Components;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
6 using System.IO;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
7
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
8 namespace Implab.Formats.JSON {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
9 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
11 /// </summary>
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
12 public class JSONScanner : Disposable {
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
13 readonly StringBuilder m_builder = new StringBuilder();
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
14
179
cin
parents: 177
diff changeset
15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression;
cin
parents: 177
diff changeset
16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
17
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
18
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
19 readonly TextScanner m_scanner;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
20
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
21 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
22 /// Создает новый экземпляр сканнера
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
23 /// </summary>
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
24 public JSONScanner(string text) {
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
25 Safe.ArgumentNotEmpty(text, "text");
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
26
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
27 m_scanner = new StringScanner(text);
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
28 }
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
29
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
31 Safe.ArgumentNotNull(reader, "reader");
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
32
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
34 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
35
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
36 public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
37 }
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
38
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
39 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
40 /// Читает следующий лексический элемент из входных данных.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
41 /// </summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
42 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
43 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
44 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
45 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
48 JSONGrammar.TokenType[] tag;
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
49 while (m_jsonContext.Execute(m_scanner, out tag)) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
50 switch (tag[0]) {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
51 case JSONGrammar.TokenType.StringBound:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
52 tokenValue = ReadString();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
53 tokenType = JsonTokenType.String;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
54 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
55 case JSONGrammar.TokenType.Number:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
57 tokenType = JsonTokenType.Number;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
58 break;
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
59 case JSONGrammar.TokenType.Whitespace:
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
60 continue;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
61 default:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
62 tokenType = (JsonTokenType)tag[0];
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
63 tokenValue = m_scanner.GetTokenValue();
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
64 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
65 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
66 return true;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
67 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
68 tokenValue = null;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
69 tokenType = JsonTokenType.None;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
70 return false;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
71 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
72
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
73 string ReadString() {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
74 int pos = 0;
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
75 var buf = new char[6]; // the buffer for unescaping chars
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
76
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
77 JSONGrammar.TokenType[] tag;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
78 m_builder.Clear();
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
79
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
80 while (m_stringContext.Execute(m_scanner, out tag)) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
81 switch (tag[0]) {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
82 case JSONGrammar.TokenType.StringBound:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
83 return m_builder.ToString();
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
84 case JSONGrammar.TokenType.UnescapedChar:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
85 m_scanner.CopyTokenTo(m_builder);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
86 break;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
87 case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
88 m_scanner.CopyTokenTo(buf, 0);
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
89 m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
90 pos++;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
91 break;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
92 case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
93 m_scanner.CopyTokenTo(buf, 0);
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
94 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
95 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
96 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
97
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
98 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
99
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
100 throw new ParserException("Unexpected end of data");
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
101 }
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
102
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
103 protected override void Dispose(bool disposing) {
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
104 if (disposing)
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
105 Safe.Dispose(m_scanner);
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
106 base.Dispose(disposing);
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
107 }
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
108 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
109 }