diff Implab/Formats/JSON/JsonScanner.cs @ 229:5f7a3e1d32b9 v2

JsonXmlReader performance tuning JsonScanner now operates strings and doesn't parses number and literals. Added SerializationHelpers to common serialize/deserialize operations
author cin
date Tue, 12 Sep 2017 19:07:42 +0300
parents 6fa235c5a760
children
line wrap: on
line diff
--- a/Implab/Formats/JSON/JsonScanner.cs	Tue Sep 12 01:19:12 2017 +0300
+++ b/Implab/Formats/JSON/JsonScanner.cs	Tue Sep 12 19:07:42 2017 +0300
@@ -25,38 +25,94 @@
             m_length = length;
         }
 
-        bool Read(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
-            scanner.Reset();
+        bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
+            scanner.ResetState();
+
+            while(scanner.Scan(m_buffer, m_pos, m_length)) {
+                // scanner requests new data
 
-            if (m_pos == m_length) {
-                m_pos = 0;
+                if (m_pos != m_length) // capture results for the future
+                    m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
+                
+                // read next data
                 m_length = Read(m_buffer, 0, m_buffer.Length);
+
                 if (m_length == 0) {
-                    tokenType = JsonGrammar.TokenType.None;
-                    return false; // EOF
+                    // no data is read
+                    if (scanner.Position == m_pos) {
+                        // scanned hasn't moved, that's the end
+                        m_pos = 0;
+                        tokenType = JsonGrammar.TokenType.None;
+                        return false;
+                    }
+
+                    if (scanner.IsFinal) {
+                        m_pos = 0;
+                        tokenType = scanner.Tag;
+                        return true;
+                    } else {
+                        throw new ParserException("Unexpected EOF");
+                    }
                 }
-            }
-            
-            while(scanner.Scan(m_buffer, m_pos, m_length - m_pos)) {
-                m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
+
                 m_pos = 0;
-                m_length = Read(m_buffer, 0, m_buffer.Length);
             }
             var scannerPos = scanner.Position;
+
+            // scanner stops as scannerPos
+            if (!scanner.IsFinal)
+                throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
+
+            tokenType = scanner.Tag;
+            if (scannerPos != m_pos && tokenType == JsonGrammar.TokenType.Number || tokenType == JsonGrammar.TokenType.Literal)
+                m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
+            
+            m_pos = scannerPos;
+            return true;
+        }
+
+        bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) {
+            scanner.ResetState();
+
+            while (scanner.Scan(m_buffer, m_pos, m_length)) {
+                // scanner requests new data
+
+                if (m_pos != m_length) // capture results for the future
+                    m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos);
+
+                // read next data
+                m_length = Read(m_buffer, 0, m_buffer.Length);
+
+                if (m_length == 0) {
+                    // no data is read
+                    if (scanner.Position == m_pos) {
+                        // scanned hasn't moved, that's the end
+                        m_pos = 0;
+                        tokenType = JsonGrammar.TokenType.None;
+                        return false;
+                    }
+
+                    if (scanner.IsFinal) {
+                        m_pos = 0;
+                        tokenType = scanner.Tag;
+                        return true;
+                    } else {
+                        throw new ParserException("Unexpected EOF");
+                    }
+                }
+
+                m_pos = 0;
+            }
+            var scannerPos = scanner.Position;
+
+            // scanner stops as scannerPos
+            if (!scanner.IsFinal)
+                throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'");
+
             if (scannerPos != m_pos) {
                 m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
                 m_pos = scannerPos;
             }
-
-            if (!scanner.IsFinal) {
-                if (m_length == 0) {
-                    // unexpected EOF
-                    throw new ParserException("Unexpected EOF");
-                } else {
-                    // unecpected character
-                    throw new ParserException($"Unexpected character '{m_buffer[m_pos + 1]}'");
-                }
-            }
             tokenType = scanner.Tag;
             return true;
         }
@@ -72,17 +128,17 @@
         /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
         /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
         /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
-        public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
+        public bool ReadToken(out string tokenValue, out JsonTokenType tokenType) {
             JsonGrammar.TokenType tag;
             m_tokenBuilder.Clear();
-            while (Read(m_jsonContext, out tag)) {
+            while (ReadChunk(m_jsonContext, out tag)) {
                 switch (tag) {
                     case JsonGrammar.TokenType.StringBound:
                         tokenValue = ReadString();
                         tokenType = JsonTokenType.String;
                         break;
                     case JsonGrammar.TokenType.Number:
-                        tokenValue = Double.Parse(m_tokenBuilder.ToString(), CultureInfo.InvariantCulture);
+                        tokenValue = m_tokenBuilder.ToString();
                         tokenType = JsonTokenType.Number;
                         break;
                     case JsonGrammar.TokenType.Literal:
@@ -108,7 +164,7 @@
             JsonGrammar.TokenType tag;
             m_tokenBuilder.Clear();
 
-            while (Read(m_stringContext, out tag)) {
+            while (ReadStringChunk(m_stringContext, out tag)) {
                 switch (tag) {
                     case JsonGrammar.TokenType.StringBound:
                         m_tokenBuilder.Length--;