diff Implab/Formats/JSON/JSONScanner.cs @ 176:0c3c69fe225b ref20160224

rewritten the text scanner
author cin
date Tue, 22 Mar 2016 18:58:40 +0300
parents e227e78d72e4
children a0ff6a0e9c44
line wrap: on
line diff
--- a/Implab/Formats/JSON/JSONScanner.cs	Mon Mar 21 18:41:45 2016 +0300
+++ b/Implab/Formats/JSON/JSONScanner.cs	Tue Mar 22 18:58:40 2016 +0300
@@ -1,25 +1,37 @@
 using System;
 using System.Globalization;
 using Implab.Automaton;
+using System.Text;
+using Implab.Components;
+using System.IO;
+using Implab.Automaton.RegularExpressions;
 
 namespace Implab.Formats.JSON {
     /// <summary>
     /// Сканнер (лексер), разбивающий поток символов на токены JSON.
     /// </summary>
-    public class JSONScanner : Scanner<object> {
-        char[] m_stringBuffer;
-        DFAStateDescriptior<>[] m_stringDFA;
-        int[] m_stringAlphabet;
+    public class JSONScanner : Disposable {
+        readonly StringBuilder m_builder = new StringBuilder();
+
+        readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA;
+        readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA;
+
+
+        readonly TextScanner m_scanner;
 
         /// <summary>
         /// Создает новый экземпляр сканнера
         /// </summary>
-        public JSONScanner()
-            : base(JSONGrammar.Instance.JsonDFA.GetTransitionTable(), JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) {
-            m_stringBuffer = new char[1024];
-            var dfa = JSONGrammar.Instance.JsonStringDFA;
-            m_stringAlphabet = dfa.Alphabet.GetTranslationMap();
-            m_stringDFA = dfa.States;
+        public JSONScanner(string text) {
+            Safe.ArgumentNotEmpty(text, "text");
+
+            m_scanner = new StringScanner(text);
+        }
+
+        public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
+            Safe.ArgumentNotNull(reader, "reader");
+
+            m_scanner = new ReaderScanner(reader);
         }
 
         /// <summary>
@@ -31,19 +43,20 @@
         /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
         /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
         public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
-            if (ReadTokenInternal()) {
-                switch ((JSONGrammar.TokenType)m_currentState.tag[0]) {
+            JSONGrammar.TokenType[] tag;
+            if (m_jsonScanner.Execute(m_scanner, out tag)) {
+                switch (tag[0]) {
                     case JSONGrammar.TokenType.StringBound:
                         tokenValue = ReadString();
                         tokenType = JsonTokenType.String;
                         break;
                     case JSONGrammar.TokenType.Number:
-                        tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture);
+                        tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture);
                         tokenType = JsonTokenType.Number;
                         break;
                     default:
-                        tokenType = (JsonTokenType)m_currentState.tag[0];
-                        tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen);
+                        tokenType = (JsonTokenType)tag[0];
+                        tokenValue = m_scanner.GetTokenValue();
                         break;
                 }
                 return true;
@@ -55,26 +68,26 @@
 
         string ReadString() {
             int pos = 0;
-            Switch(m_stringDFA, m_stringAlphabet);
-            while (ReadTokenInternal()) {
-                switch ((JSONGrammar.TokenType)m_currentState.tag[0]) {
+            char[] buf = new char[6]; // the buffer for unescaping chars
+
+            JSONGrammar.TokenType[] tag;
+            m_builder.Clear();
+
+            while (m_stringScanner.Execute(m_scanner, out tag)) {
+                switch (tag[0]) {
                     case JSONGrammar.TokenType.StringBound:
-                        Restore();
-                        return new String(m_stringBuffer, 0, pos);
+                        return m_builder.ToString();
                     case JSONGrammar.TokenType.UnescapedChar:
-                        EnsureStringBufferSize(pos + m_tokenLen);
-                        Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen);
-                        pos += m_tokenLen;
+                        m_scanner.CopyTokenTo(m_builder);
                         break;
-                    case JSONGrammar.TokenType.EscapedUnicode:
-                        EnsureStringBufferSize(pos + 1);
-                        m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2);
+                    case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence
+                        m_scanner.CopyTokenTo(buf, 0); 
+                        m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2));
                         pos++;
                         break;
-                    case JSONGrammar.TokenType.EscapedChar:
-                        EnsureStringBufferSize(pos + 1);
-                        m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]);
-                        pos++;
+                    case JSONGrammar.TokenType.EscapedChar:  // \t - escape sequence
+                        m_scanner.CopyTokenTo(buf, 0);
+                        m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
                         break;
                     default:
                         break;
@@ -84,13 +97,5 @@
 
             throw new ParserException("Unexpected end of data");
         }
-
-        void EnsureStringBufferSize(int size) {
-            if (size > m_stringBuffer.Length) {
-                var newBuffer = new char[size];
-                m_stringBuffer.CopyTo(newBuffer, 0);
-                m_stringBuffer = newBuffer;
-            }
-        }
     }
 }