diff Implab/Formats/Json/JsonScanner.cs @ 232:133ba4444acc v2

Слияние
author cin
date Thu, 21 Sep 2017 01:14:27 +0300
parents 3e26338eb977
children 302ca905c19e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Formats/Json/JsonScanner.cs	Thu Sep 21 01:14:27 2017 +0300
@@ -0,0 +1,190 @@
+using System;
+using System.Globalization;
+using Implab.Automaton;
+using System.Text;
+using Implab.Components;
+using System.IO;
+
+namespace Implab.Formats.Json {
+    /// <summary>
+    /// Сканнер (лексер), разбивающий поток символов на токены JSON.
+    /// </summary>
+    public abstract class JsonScanner : Disposable {
+        readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner();
+        readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner();
+
+        readonly char[] m_unescapeBuf = new char[4];
+        readonly char[] m_buffer;
+        int m_length;
+        int m_pos;
+        readonly StringBuilder m_tokenBuilder = new StringBuilder();
+
+        protected JsonScanner(char[] buffer, int pos, int length) {
+            m_buffer = buffer;
+            m_pos = pos;
+            m_length = length;
+        }
+
+        bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
+            scanner.ResetState();
+
+            while(scanner.Scan(m_buffer, m_pos, m_length)) {
+                // scanner requests new data
+
+                if (m_pos != m_length) // capture results for the future
+                    m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
+                
+                // read next data
+                m_length = Read(m_buffer, 0, m_buffer.Length);
+
+                if (m_length == 0) {
+                    // no data is read
+                    if (scanner.Position == m_pos) {
+                        // scanned hasn't moved, that's the end
+                        m_pos = 0;
+                        tokenType = JsonGrammar.TokenType.None;
+                        return false;
+                    }
+
+                    if (scanner.IsFinal) {
+                        m_pos = 0;
+                        tokenType = scanner.Tag;
+                        return true;
+                    } else {
+                        throw new ParserException("Unexpected EOF");
+                    }
+                }
+
+                m_pos = 0;
+            }
+            var scannerPos = scanner.Position;
+
+            // scanner stops as scannerPos
+            if (!scanner.IsFinal)
+                throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
+
+            tokenType = scanner.Tag;
+            if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal)
+                m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
+            
+            m_pos = scannerPos;
+            return true;
+        }
+
+        bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
+            scanner.ResetState();
+
+            while (scanner.Scan(m_buffer, m_pos, m_length)) {
+                // scanner requests new data
+
+                if (m_pos != m_length) // capture results for the future
+                    m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
+
+                // read next data
+                m_length = Read(m_buffer, 0, m_buffer.Length);
+
+                if (m_length == 0) {
+                    // no data is read
+                    if (scanner.Position == m_pos) {
+                        // scanned hasn't moved, that's the end
+                        m_pos = 0;
+                        tokenType = JsonGrammar.TokenType.None;
+                        return false;
+                    }
+
+                    if (scanner.IsFinal) {
+                        m_pos = 0;
+                        tokenType = scanner.Tag;
+                        return true;
+                    } else {
+                        throw new ParserException("Unexpected EOF");
+                    }
+                }
+
+                m_pos = 0;
+            }
+            var scannerPos = scanner.Position;
+
+            // scanner stops as scannerPos
+            if (!scanner.IsFinal)
+                throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
+
+            if (scannerPos != m_pos) {
+                m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
+                m_pos = scannerPos;
+            }
+            tokenType = scanner.Tag;
+            return true;
+        }
+
+        protected abstract int Read(char[] buffer, int offset, int size);
+
+
+        /// <summary>
+        /// Читает следующий лексический элемент из входных данных.
+        /// </summary>
+        /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
+        /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
+        /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
+        /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
+        /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
+        public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) {
+            JsonGrammar.TokenType tag;
+            m_tokenBuilder.Clear();
+            while (ReadChunk(m_jsonContext, out tag)) {
+                switch (tag) {
+                    case JsonGrammar.TokenType.StringBound:
+                        tokenValue = ReadString();
+                        tokenType = JsonTokenType.String;
+                        break;
+                    case JsonGrammar.TokenType.Number:
+                        tokenValue = m_tokenBuilder.ToString();
+                        tokenType = JsonTokenType.Number;
+                        break;
+                    case JsonGrammar.TokenType.Literal:
+                        tokenType = JsonTokenType.Literal;
+                        tokenValue = m_tokenBuilder.ToString();
+                        break;
+                    case JsonGrammar.TokenType.Whitespace:
+                        m_tokenBuilder.Clear();
+                        continue;
+                    default:
+                        tokenType = (JsonTokenType)tag;
+                        tokenValue = null;
+                        break;
+                }
+                return true;
+            }
+            tokenValue = null;
+            tokenType = JsonTokenType.None;
+            return false;
+        }
+
+        string ReadString() {
+            JsonGrammar.TokenType tag;
+            m_tokenBuilder.Clear();
+
+            while (ReadStringChunk(m_stringContext, out tag)) {
+                switch (tag) {
+                    case JsonGrammar.TokenType.StringBound:
+                        m_tokenBuilder.Length--;
+                        return m_tokenBuilder.ToString();
+                    case JsonGrammar.TokenType.UnescapedChar:
+                        break;
+                    case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
+                        m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4);
+                        m_tokenBuilder.Length -= 6;
+                        m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0));
+                        break;
+                    case JsonGrammar.TokenType.EscapedChar:  // \t - escape sequence
+                        var ch = m_tokenBuilder[m_tokenBuilder.Length-1];
+                        m_tokenBuilder.Length -= 2;
+                        m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch));
+                        break;
+                }
+            }
+
+            throw new ParserException("Unexpected end of data");
+        }
+    }
+}