Mercurial > pub > ImplabNet
comparison Implab/Formats/Json/JsonScanner.cs @ 230:3e26338eb977 v2
slowly cutting off mono specific settings
| author | cin |
|---|---|
| date | Wed, 13 Sep 2017 16:55:13 +0300 |
| parents | Implab/Formats/JSON/JsonScanner.cs@5f7a3e1d32b9 |
| children | 302ca905c19e |
comparison
equal
deleted
inserted
replaced
| 229:5f7a3e1d32b9 | 230:3e26338eb977 |
|---|---|
| 1 using System; | |
| 2 using System.Globalization; | |
| 3 using Implab.Automaton; | |
| 4 using System.Text; | |
| 5 using Implab.Components; | |
| 6 using System.IO; | |
| 7 | |
| 8 namespace Implab.Formats.Json { | |
| 9 /// <summary> | |
| 10 /// Сканнер (лексер), разбивающий поток символов на токены JSON. | |
| 11 /// </summary> | |
| 12 public abstract class JsonScanner : Disposable { | |
| 13 readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); | |
| 14 readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); | |
| 15 | |
| 16 readonly char[] m_unescapeBuf = new char[4]; | |
| 17 readonly char[] m_buffer; | |
| 18 int m_length; | |
| 19 int m_pos; | |
| 20 readonly StringBuilder m_tokenBuilder = new StringBuilder(); | |
| 21 | |
| 22 protected JsonScanner(char[] buffer, int pos, int length) { | |
| 23 m_buffer = buffer; | |
| 24 m_pos = pos; | |
| 25 m_length = length; | |
| 26 } | |
| 27 | |
| 28 bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
| 29 scanner.ResetState(); | |
| 30 | |
| 31 while(scanner.Scan(m_buffer, m_pos, m_length)) { | |
| 32 // scanner requests new data | |
| 33 | |
| 34 if (m_pos != m_length) // capture results for the future | |
| 35 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); | |
| 36 | |
| 37 // read next data | |
| 38 m_length = Read(m_buffer, 0, m_buffer.Length); | |
| 39 | |
| 40 if (m_length == 0) { | |
| 41 // no data is read | |
| 42 if (scanner.Position == m_pos) { | |
| 43 // scanned hasn't moved, that's the end | |
| 44 m_pos = 0; | |
| 45 tokenType = JsonGrammar.TokenType.None; | |
| 46 return false; | |
| 47 } | |
| 48 | |
| 49 if (scanner.IsFinal) { | |
| 50 m_pos = 0; | |
| 51 tokenType = scanner.Tag; | |
| 52 return true; | |
| 53 } else { | |
| 54 throw new ParserException("Unexpected EOF"); | |
| 55 } | |
| 56 } | |
| 57 | |
| 58 m_pos = 0; | |
| 59 } | |
| 60 var scannerPos = scanner.Position; | |
| 61 | |
| 62 // scanner stops as scannerPos | |
| 63 if (!scanner.IsFinal) | |
| 64 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); | |
| 65 | |
| 66 tokenType = scanner.Tag; | |
| 67 if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal) | |
| 68 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); | |
| 69 | |
| 70 m_pos = scannerPos; | |
| 71 return true; | |
| 72 } | |
| 73 | |
| 74 bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { | |
| 75 scanner.ResetState(); | |
| 76 | |
| 77 while (scanner.Scan(m_buffer, m_pos, m_length)) { | |
| 78 // scanner requests new data | |
| 79 | |
| 80 if (m_pos != m_length) // capture results for the future | |
| 81 m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); | |
| 82 | |
| 83 // read next data | |
| 84 m_length = Read(m_buffer, 0, m_buffer.Length); | |
| 85 | |
| 86 if (m_length == 0) { | |
| 87 // no data is read | |
| 88 if (scanner.Position == m_pos) { | |
| 89 // scanned hasn't moved, that's the end | |
| 90 m_pos = 0; | |
| 91 tokenType = JsonGrammar.TokenType.None; | |
| 92 return false; | |
| 93 } | |
| 94 | |
| 95 if (scanner.IsFinal) { | |
| 96 m_pos = 0; | |
| 97 tokenType = scanner.Tag; | |
| 98 return true; | |
| 99 } else { | |
| 100 throw new ParserException("Unexpected EOF"); | |
| 101 } | |
| 102 } | |
| 103 | |
| 104 m_pos = 0; | |
| 105 } | |
| 106 var scannerPos = scanner.Position; | |
| 107 | |
| 108 // scanner stops as scannerPos | |
| 109 if (!scanner.IsFinal) | |
| 110 throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); | |
| 111 | |
| 112 if (scannerPos != m_pos) { | |
| 113 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); | |
| 114 m_pos = scannerPos; | |
| 115 } | |
| 116 tokenType = scanner.Tag; | |
| 117 return true; | |
| 118 } | |
| 119 | |
| 120 protected abstract int Read(char[] buffer, int offset, int size); | |
| 121 | |
| 122 | |
| 123 /// <summary> | |
| 124 /// Читает следующий лексический элемент из входных данных. | |
| 125 /// </summary> | |
| 126 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> | |
| 127 /// <param name="tokenType">Возвращает тип прочитанного токена.</param> | |
| 128 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> | |
| 129 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. | |
| 130 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> | |
| 131 public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) { | |
| 132 JsonGrammar.TokenType tag; | |
| 133 m_tokenBuilder.Clear(); | |
| 134 while (ReadChunk(m_jsonContext, out tag)) { | |
| 135 switch (tag) { | |
| 136 case JsonGrammar.TokenType.StringBound: | |
| 137 tokenValue = ReadString(); | |
| 138 tokenType = JsonTokenType.String; | |
| 139 break; | |
| 140 case JsonGrammar.TokenType.Number: | |
| 141 tokenValue = m_tokenBuilder.ToString(); | |
| 142 tokenType = JsonTokenType.Number; | |
| 143 break; | |
| 144 case JsonGrammar.TokenType.Literal: | |
| 145 tokenType = JsonTokenType.Literal; | |
| 146 tokenValue = m_tokenBuilder.ToString(); | |
| 147 break; | |
| 148 case JsonGrammar.TokenType.Whitespace: | |
| 149 m_tokenBuilder.Clear(); | |
| 150 continue; | |
| 151 default: | |
| 152 tokenType = (JsonTokenType)tag; | |
| 153 tokenValue = null; | |
| 154 break; | |
| 155 } | |
| 156 return true; | |
| 157 } | |
| 158 tokenValue = null; | |
| 159 tokenType = JsonTokenType.None; | |
| 160 return false; | |
| 161 } | |
| 162 | |
| 163 string ReadString() { | |
| 164 JsonGrammar.TokenType tag; | |
| 165 m_tokenBuilder.Clear(); | |
| 166 | |
| 167 while (ReadStringChunk(m_stringContext, out tag)) { | |
| 168 switch (tag) { | |
| 169 case JsonGrammar.TokenType.StringBound: | |
| 170 m_tokenBuilder.Length--; | |
| 171 return m_tokenBuilder.ToString(); | |
| 172 case JsonGrammar.TokenType.UnescapedChar: | |
| 173 break; | |
| 174 case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence | |
| 175 m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); | |
| 176 m_tokenBuilder.Length -= 6; | |
| 177 m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); | |
| 178 break; | |
| 179 case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence | |
| 180 var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; | |
| 181 m_tokenBuilder.Length -= 2; | |
| 182 m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); | |
| 183 break; | |
| 184 } | |
| 185 } | |
| 186 | |
| 187 throw new ParserException("Unexpected end of data"); | |
| 188 } | |
| 189 } | |
| 190 } |
