annotate Implab/Formats/JSON/JsonScanner.cs @ 228:6fa235c5a760 v2

Rewritten JsonScanner, JsonParser, fixed naming style
author cin
date Tue, 12 Sep 2017 01:19:12 +0300
parents Implab/Formats/JSON/JSONScanner.cs@7d07503621fe
children 5f7a3e1d32b9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
1 using System;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
2 using System.Globalization;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
3 using Implab.Automaton;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
4 using System.Text;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
5 using Implab.Components;
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
6 using System.IO;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
7
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
8 namespace Implab.Formats.Json {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
9 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
11 /// </summary>
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
12 public abstract class JsonScanner : Disposable {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
15
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
16 readonly char[] m_unescapeBuf = new char[4];
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
17 readonly char[] m_buffer;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
18 int m_length;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
19 int m_pos;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
20 readonly StringBuilder m_tokenBuilder = new StringBuilder();
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
21
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
22 protected JsonScanner(char[] buffer, int pos, int length) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
23 m_buffer = buffer;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
24 m_pos = pos;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
25 m_length = length;
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
26 }
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
27
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
28 bool Read(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
29 scanner.Reset();
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
30
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
31 if (m_pos == m_length) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
32 m_pos = 0;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
33 m_length = Read(m_buffer, 0, m_buffer.Length);
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
34 if (m_length == 0) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
35 tokenType = JsonGrammar.TokenType.None;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
36 return false; // EOF
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
37 }
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
38 }
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
39
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
40 while(scanner.Scan(m_buffer, m_pos, m_length - m_pos)) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
41 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
42 m_pos = 0;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
43 m_length = Read(m_buffer, 0, m_buffer.Length);
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
44 }
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
45 var scannerPos = scanner.Position;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
46 if (scannerPos != m_pos) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
47 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
48 m_pos = scannerPos;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
49 }
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
50
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
51 if (!scanner.IsFinal) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
52 if (m_length == 0) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
53 // unexpected EOF
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
54 throw new ParserException("Unexpected EOF");
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
55 } else {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
56 // unecpected character
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
57 throw new ParserException($"Unexpected character '{m_buffer[m_pos + 1]}'");
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
58 }
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
59 }
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
60 tokenType = scanner.Tag;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
61 return true;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
62 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
63
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
64 protected abstract int Read(char[] buffer, int offset, int size);
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
65
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 179
diff changeset
66
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
67 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
68 /// Читает следующий лексический элемент из входных данных.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
69 /// </summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
70 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
71 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
72 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
73 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
74 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
75 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
76 JsonGrammar.TokenType tag;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
77 m_tokenBuilder.Clear();
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
78 while (Read(m_jsonContext, out tag)) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
79 switch (tag) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
80 case JsonGrammar.TokenType.StringBound:
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
81 tokenValue = ReadString();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
82 tokenType = JsonTokenType.String;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
83 break;
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
84 case JsonGrammar.TokenType.Number:
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
85 tokenValue = Double.Parse(m_tokenBuilder.ToString(), CultureInfo.InvariantCulture);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
86 tokenType = JsonTokenType.Number;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
87 break;
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
88 case JsonGrammar.TokenType.Literal:
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
89 tokenType = JsonTokenType.Literal;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
90 tokenValue = m_tokenBuilder.ToString();
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
91 break;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
92 case JsonGrammar.TokenType.Whitespace:
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
93 m_tokenBuilder.Clear();
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 180
diff changeset
94 continue;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
95 default:
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
96 tokenType = (JsonTokenType)tag;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
97 tokenValue = null;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
98 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
99 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
100 return true;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
101 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
102 tokenValue = null;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
103 tokenType = JsonTokenType.None;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
104 return false;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
105 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
106
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
107 string ReadString() {
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
108 JsonGrammar.TokenType tag;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
109 m_tokenBuilder.Clear();
176
0c3c69fe225b rewritten the text scanner
cin
parents: 165
diff changeset
110
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
111 while (Read(m_stringContext, out tag)) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
112 switch (tag) {
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
113 case JsonGrammar.TokenType.StringBound:
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
114 m_tokenBuilder.Length--;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
115 return m_tokenBuilder.ToString();
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
116 case JsonGrammar.TokenType.UnescapedChar:
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
117 break;
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
118 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
119 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
120 m_tokenBuilder.Length -= 6;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
121 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
122 break;
228
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
123 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
124 var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
125 m_tokenBuilder.Length -= 2;
6fa235c5a760 Rewritten JsonScanner, JsonParser, fixed naming style
cin
parents: 208
diff changeset
126 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
127 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
128 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
129 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
130
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
131 throw new ParserException("Unexpected end of data");
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
132 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
133 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
134 }