Mercurial > pub > ImplabNet
view Implab/Formats/JSON/JsonScanner.cs @ 229:5f7a3e1d32b9 v2
JsonXmlReader performance tuning
JsonScanner now operates strings and doesn't
  parses number and literals.
Added SerializationHelpers to common serialize/deserialize operations
| author | cin | 
|---|---|
| date | Tue, 12 Sep 2017 19:07:42 +0300 | 
| parents | 6fa235c5a760 | 
| children | 
line wrap: on
 line source
using System; using System.Globalization; using Implab.Automaton; using System.Text; using Implab.Components; using System.IO; namespace Implab.Formats.Json { /// <summary> /// Сканнер (лексер), разбивающий поток символов на токены JSON. /// </summary> public abstract class JsonScanner : Disposable { readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); readonly char[] m_unescapeBuf = new char[4]; readonly char[] m_buffer; int m_length; int m_pos; readonly StringBuilder m_tokenBuilder = new StringBuilder(); protected JsonScanner(char[] buffer, int pos, int length) { m_buffer = buffer; m_pos = pos; m_length = length; } bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { scanner.ResetState(); while(scanner.Scan(m_buffer, m_pos, m_length)) { // scanner requests new data if (m_pos != m_length) // capture results for the future m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); // read next data m_length = Read(m_buffer, 0, m_buffer.Length); if (m_length == 0) { // no data is read if (scanner.Position == m_pos) { // scanned hasn't moved, that's the end m_pos = 0; tokenType = JsonGrammar.TokenType.None; return false; } if (scanner.IsFinal) { m_pos = 0; tokenType = scanner.Tag; return true; } else { throw new ParserException("Unexpected EOF"); } } m_pos = 0; } var scannerPos = scanner.Position; // scanner stops as scannerPos if (!scanner.IsFinal) throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); tokenType = scanner.Tag; if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal) m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); m_pos = scannerPos; return true; } bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { scanner.ResetState(); while (scanner.Scan(m_buffer, m_pos, m_length)) { // scanner requests new data if (m_pos != m_length) // capture results for the future m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); // read next data m_length = Read(m_buffer, 0, m_buffer.Length); if (m_length == 0) { // no data is read if (scanner.Position == m_pos) { // scanned hasn't moved, that's the end m_pos = 0; tokenType = JsonGrammar.TokenType.None; return false; } if (scanner.IsFinal) { m_pos = 0; tokenType = scanner.Tag; return true; } else { throw new ParserException("Unexpected EOF"); } } m_pos = 0; } var scannerPos = scanner.Position; // scanner stops as scannerPos if (!scanner.IsFinal) throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); if (scannerPos != m_pos) { m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); m_pos = scannerPos; } tokenType = scanner.Tag; return true; } protected abstract int Read(char[] buffer, int offset, int size); /// <summary> /// Читает следующий лексический элемент из входных данных. /// </summary> /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> /// <param name="tokenType">Возвращает тип прочитанного токена.</param> /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) { JsonGrammar.TokenType tag; m_tokenBuilder.Clear(); while (ReadChunk(m_jsonContext, out tag)) { switch (tag) { case JsonGrammar.TokenType.StringBound: tokenValue = ReadString(); tokenType = JsonTokenType.String; break; case JsonGrammar.TokenType.Number: tokenValue = m_tokenBuilder.ToString(); tokenType = JsonTokenType.Number; break; case JsonGrammar.TokenType.Literal: tokenType = JsonTokenType.Literal; tokenValue = m_tokenBuilder.ToString(); break; case JsonGrammar.TokenType.Whitespace: m_tokenBuilder.Clear(); continue; default: tokenType = (JsonTokenType)tag; tokenValue = null; break; } return true; } tokenValue = null; tokenType = JsonTokenType.None; return false; } string ReadString() { JsonGrammar.TokenType tag; m_tokenBuilder.Clear(); while (ReadStringChunk(m_stringContext, out tag)) { switch (tag) { case JsonGrammar.TokenType.StringBound: m_tokenBuilder.Length--; return m_tokenBuilder.ToString(); case JsonGrammar.TokenType.UnescapedChar: break; case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); m_tokenBuilder.Length -= 6; m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); break; case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; m_tokenBuilder.Length -= 2; m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); break; } } throw new ParserException("Unexpected end of data"); } } }
