comparison Implab/Formats/JSON/JsonScanner.cs @ 228:6fa235c5a760 v2

Rewritten JsonScanner, JsonParser, fixed naming style
author cin
date Tue, 12 Sep 2017 01:19:12 +0300
parents Implab/Formats/JSON/JSONScanner.cs@7d07503621fe
children 5f7a3e1d32b9
comparison
equal deleted inserted replaced
227:8d5de4eb9c2c 228:6fa235c5a760
1 using System;
2 using System.Globalization;
3 using Implab.Automaton;
4 using System.Text;
5 using Implab.Components;
6 using System.IO;
7
8 namespace Implab.Formats.Json {
9 /// <summary>
10 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
11 /// </summary>
12 public abstract class JsonScanner : Disposable {
13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
15
16 readonly char[] m_unescapeBuf = new char[4];
17 readonly char[] m_buffer;
18 int m_length;
19 int m_pos;
20 readonly StringBuilder m_tokenBuilder = new StringBuilder();
21
22 protected JsonScanner(char[] buffer, int pos, int length) {
23 m_buffer = buffer;
24 m_pos = pos;
25 m_length = length;
26 }
27
28 bool Read(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
29 scanner.Reset();
30
31 if (m_pos == m_length) {
32 m_pos = 0;
33 m_length = Read(m_buffer, 0, m_buffer.Length);
34 if (m_length == 0) {
35 tokenType = JsonGrammar.TokenType.None;
36 return false; // EOF
37 }
38 }
39
40 while(scanner.Scan(m_buffer, m_pos, m_length - m_pos)) {
41 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
42 m_pos = 0;
43 m_length = Read(m_buffer, 0, m_buffer.Length);
44 }
45 var scannerPos = scanner.Position;
46 if (scannerPos != m_pos) {
47 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
48 m_pos = scannerPos;
49 }
50
51 if (!scanner.IsFinal) {
52 if (m_length == 0) {
53 // unexpected EOF
54 throw new ParserException("Unexpected EOF");
55 } else {
56 // unecpected character
57 throw new ParserException($"Unexpected character '{m_buffer[m_pos + 1]}'");
58 }
59 }
60 tokenType = scanner.Tag;
61 return true;
62 }
63
64 protected abstract int Read(char[] buffer, int offset, int size);
65
66
67 /// <summary>
68 /// Читает следующий лексический элемент из входных данных.
69 /// </summary>
70 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
71 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
72 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
73 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
74 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
75 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
76 JsonGrammar.TokenType tag;
77 m_tokenBuilder.Clear();
78 while (Read(m_jsonContext, out tag)) {
79 switch (tag) {
80 case JsonGrammar.TokenType.StringBound:
81 tokenValue = ReadString();
82 tokenType = JsonTokenType.String;
83 break;
84 case JsonGrammar.TokenType.Number:
85 tokenValue = Double.Parse(m_tokenBuilder.ToString(), CultureInfo.InvariantCulture);
86 tokenType = JsonTokenType.Number;
87 break;
88 case JsonGrammar.TokenType.Literal:
89 tokenType = JsonTokenType.Literal;
90 tokenValue = m_tokenBuilder.ToString();
91 break;
92 case JsonGrammar.TokenType.Whitespace:
93 m_tokenBuilder.Clear();
94 continue;
95 default:
96 tokenType = (JsonTokenType)tag;
97 tokenValue = null;
98 break;
99 }
100 return true;
101 }
102 tokenValue = null;
103 tokenType = JsonTokenType.None;
104 return false;
105 }
106
107 string ReadString() {
108 JsonGrammar.TokenType tag;
109 m_tokenBuilder.Clear();
110
111 while (Read(m_stringContext, out tag)) {
112 switch (tag) {
113 case JsonGrammar.TokenType.StringBound:
114 m_tokenBuilder.Length--;
115 return m_tokenBuilder.ToString();
116 case JsonGrammar.TokenType.UnescapedChar:
117 break;
118 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
119 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
120 m_tokenBuilder.Length -= 6;
121 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
122 break;
123 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence
124 var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
125 m_tokenBuilder.Length -= 2;
126 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
127 break;
128 }
129 }
130
131 throw new ParserException("Unexpected end of data");
132 }
133 }
134 }