Mercurial > pub > ImplabNet
diff Implab/Formats/Json/JsonScanner.cs @ 230:3e26338eb977 v2
slowly cutting off mono specific settings
author | cin |
---|---|
date | Wed, 13 Sep 2017 16:55:13 +0300 |
parents | Implab/Formats/JSON/JsonScanner.cs@5f7a3e1d32b9 |
children | 302ca905c19e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/Json/JsonScanner.cs Wed Sep 13 16:55:13 2017 +0300 @@ -0,0 +1,190 @@ +using System; +using System.Globalization; +using Implab.Automaton; +using System.Text; +using Implab.Components; +using System.IO; + +namespace Implab.Formats.Json { + /// <summary> + /// Сканнер (лексер), разбивающий поток символов на токены JSON. + /// </summary> + public abstract class JsonScanner : Disposable { + readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); + readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); + + readonly char[] m_unescapeBuf = new char[4]; + readonly char[] m_buffer; + int m_length; + int m_pos; + readonly StringBuilder m_tokenBuilder = new StringBuilder(); + + protected JsonScanner(char[] buffer, int pos, int length) { + m_buffer = buffer; + m_pos = pos; + m_length = length; + } + + bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { + scanner.ResetState(); + + while(scanner.Scan(m_buffer, m_pos, m_length)) { + // scanner requests new data + + if (m_pos != m_length) // capture results for the future + m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); + + // read next data + m_length = Read(m_buffer, 0, m_buffer.Length); + + if (m_length == 0) { + // no data is read + if (scanner.Position == m_pos) { + // scanned hasn't moved, that's the end + m_pos = 0; + tokenType = JsonGrammar.TokenType.None; + return false; + } + + if (scanner.IsFinal) { + m_pos = 0; + tokenType = scanner.Tag; + return true; + } else { + throw new ParserException("Unexpected EOF"); + } + } + + m_pos = 0; + } + var scannerPos = scanner.Position; + + // scanner stops as scannerPos + if (!scanner.IsFinal) + throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); + + tokenType = scanner.Tag; + if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal) + m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); + + m_pos = scannerPos; + return true; + } + + bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { + scanner.ResetState(); + + while (scanner.Scan(m_buffer, m_pos, m_length)) { + // scanner requests new data + + if (m_pos != m_length) // capture results for the future + m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); + + // read next data + m_length = Read(m_buffer, 0, m_buffer.Length); + + if (m_length == 0) { + // no data is read + if (scanner.Position == m_pos) { + // scanned hasn't moved, that's the end + m_pos = 0; + tokenType = JsonGrammar.TokenType.None; + return false; + } + + if (scanner.IsFinal) { + m_pos = 0; + tokenType = scanner.Tag; + return true; + } else { + throw new ParserException("Unexpected EOF"); + } + } + + m_pos = 0; + } + var scannerPos = scanner.Position; + + // scanner stops as scannerPos + if (!scanner.IsFinal) + throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); + + if (scannerPos != m_pos) { + m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); + m_pos = scannerPos; + } + tokenType = scanner.Tag; + return true; + } + + protected abstract int Read(char[] buffer, int offset, int size); + + + /// <summary> + /// Читает следующий лексический элемент из входных данных. + /// </summary> + /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> + /// <param name="tokenType">Возвращает тип прочитанного токена.</param> + /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> + /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. + /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> + public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) { + JsonGrammar.TokenType tag; + m_tokenBuilder.Clear(); + while (ReadChunk(m_jsonContext, out tag)) { + switch (tag) { + case JsonGrammar.TokenType.StringBound: + tokenValue = ReadString(); + tokenType = JsonTokenType.String; + break; + case JsonGrammar.TokenType.Number: + tokenValue = m_tokenBuilder.ToString(); + tokenType = JsonTokenType.Number; + break; + case JsonGrammar.TokenType.Literal: + tokenType = JsonTokenType.Literal; + tokenValue = m_tokenBuilder.ToString(); + break; + case JsonGrammar.TokenType.Whitespace: + m_tokenBuilder.Clear(); + continue; + default: + tokenType = (JsonTokenType)tag; + tokenValue = null; + break; + } + return true; + } + tokenValue = null; + tokenType = JsonTokenType.None; + return false; + } + + string ReadString() { + JsonGrammar.TokenType tag; + m_tokenBuilder.Clear(); + + while (ReadStringChunk(m_stringContext, out tag)) { + switch (tag) { + case JsonGrammar.TokenType.StringBound: + m_tokenBuilder.Length--; + return m_tokenBuilder.ToString(); + case JsonGrammar.TokenType.UnescapedChar: + break; + case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence + m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); + m_tokenBuilder.Length -= 6; + m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); + break; + case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence + var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; + m_tokenBuilder.Length -= 2; + m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); + break; + } + } + + throw new ParserException("Unexpected end of data"); + } + } +}