annotate Implab/Formats/JSON/JSONScanner.cs @ 183:4f82e0f161c3 ref20160224

fixed DFA optimization, JSON is fully functional
author cin
date Fri, 25 Mar 2016 02:49:02 +0300
parents c32688129f14
children 7d07503621fe
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
1 using System;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
2 using System.Globalization;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
3 using Implab.Automaton;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
4 using System.Text;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
5 using Implab.Components;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
6 using System.IO;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
7
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
8 namespace Implab.Formats.JSON {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
9 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
11 /// </summary>
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
12 public class JSONScanner : Disposable {
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
13 readonly StringBuilder m_builder = new StringBuilder();
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
14
179
cin
parents: 177
diff changeset
15 readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression;
cin
parents: 177
diff changeset
16 readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
17
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
18
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
19 readonly TextScanner m_scanner;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
20
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
21 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
22 /// Создает новый экземпляр сканнера
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
23 /// </summary>
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
24 public JSONScanner(string text) {
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
25 Safe.ArgumentNotEmpty(text, "text");
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
26
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
27 m_scanner = new StringScanner(text);
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
28 }
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
29
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
30 public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
31 Safe.ArgumentNotNull(reader, "reader");
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
32
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
33 m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
34 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
35
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
36 public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
37 }
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
38
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
39 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
40 /// Читает следующий лексический элемент из входных данных.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
41 /// </summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
42 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
43 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
44 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
45 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
46 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
47 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
48 JSONGrammar.TokenType[] tag;
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
49 while (m_jsonContext.Execute(m_scanner, out tag)) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
50 switch (tag[0]) {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
51 case JSONGrammar.TokenType.StringBound:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
52 tokenValue = ReadString();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
53 tokenType = JsonTokenType.String;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
54 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
55 case JSONGrammar.TokenType.Number:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
56 tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
57 tokenType = JsonTokenType.Number;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
58 break;
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
59 case JSONGrammar.TokenType.Whitespace:
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
60 continue;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
61 default:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
62 tokenType = (JsonTokenType)tag[0];
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
63 tokenValue = m_scanner.GetTokenValue();
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
64 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
65 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
66 return true;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
67 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
68 tokenValue = null;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
69 tokenType = JsonTokenType.None;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
70 return false;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
71 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
72
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
73 string ReadString() {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
74 int pos = 0;
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
75 var buf = new char[6]; // the buffer for unescaping chars
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
76
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
77 JSONGrammar.TokenType[] tag;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
78 m_builder.Clear();
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
79
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
80 while (m_stringContext.Execute(m_scanner, out tag)) {
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
81 switch (tag[0]) {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
82 case JSONGrammar.TokenType.StringBound:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
83 return m_builder.ToString();
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
84 case JSONGrammar.TokenType.UnescapedChar:
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
85 m_scanner.CopyTokenTo(m_builder);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
86 break;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
87 case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
88 m_scanner.CopyTokenTo(buf, 0);
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
89 m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
90 pos++;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
91 break;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
92 case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
93 m_scanner.CopyTokenTo(buf, 0);
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
94 m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
95 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
96 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
97
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
98 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
99
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
100 throw new ParserException("Unexpected end of data");
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
101 }
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
102
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
103 protected override void Dispose(bool disposing) {
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
104 if (disposing)
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
105 Safe.Dispose(m_scanner);
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
106 base.Dispose(disposing);
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
107 }
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
108 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
109 }