Mercurial > pub > ImplabNet
diff Implab/Formats/JSON/JSONScanner.cs @ 176:0c3c69fe225b ref20160224
rewritten the text scanner
author | cin |
---|---|
date | Tue, 22 Mar 2016 18:58:40 +0300 |
parents | e227e78d72e4 |
children | a0ff6a0e9c44 |
line wrap: on
line diff
--- a/Implab/Formats/JSON/JSONScanner.cs Mon Mar 21 18:41:45 2016 +0300 +++ b/Implab/Formats/JSON/JSONScanner.cs Tue Mar 22 18:58:40 2016 +0300 @@ -1,25 +1,37 @@ using System; using System.Globalization; using Implab.Automaton; +using System.Text; +using Implab.Components; +using System.IO; +using Implab.Automaton.RegularExpressions; namespace Implab.Formats.JSON { /// <summary> /// Сканнер (лексер), разбивающий поток символов на токены JSON. /// </summary> - public class JSONScanner : Scanner<object> { - char[] m_stringBuffer; - DFAStateDescriptior<>[] m_stringDFA; - int[] m_stringAlphabet; + public class JSONScanner : Disposable { + readonly StringBuilder m_builder = new StringBuilder(); + + readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA; + readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA; + + + readonly TextScanner m_scanner; /// <summary> /// Создает новый экземпляр сканнера /// </summary> - public JSONScanner() - : base(JSONGrammar.Instance.JsonDFA.GetTransitionTable(), JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) { - m_stringBuffer = new char[1024]; - var dfa = JSONGrammar.Instance.JsonStringDFA; - m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); - m_stringDFA = dfa.States; + public JSONScanner(string text) { + Safe.ArgumentNotEmpty(text, "text"); + + m_scanner = new StringScanner(text); + } + + public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { + Safe.ArgumentNotNull(reader, "reader"); + + m_scanner = new ReaderScanner(reader); } /// <summary> @@ -31,19 +43,20 @@ /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { - if (ReadTokenInternal()) { - switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { + JSONGrammar.TokenType[] tag; + if (m_jsonScanner.Execute(m_scanner, out tag)) { + switch (tag[0]) { case JSONGrammar.TokenType.StringBound: tokenValue = ReadString(); tokenType = JsonTokenType.String; break; case JSONGrammar.TokenType.Number: - tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture); + tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); tokenType = JsonTokenType.Number; break; default: - tokenType = (JsonTokenType)m_currentState.tag[0]; - tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen); + tokenType = (JsonTokenType)tag[0]; + tokenValue = m_scanner.GetTokenValue(); break; } return true; @@ -55,26 +68,26 @@ string ReadString() { int pos = 0; - Switch(m_stringDFA, m_stringAlphabet); - while (ReadTokenInternal()) { - switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { + char[] buf = new char[6]; // the buffer for unescaping chars + + JSONGrammar.TokenType[] tag; + m_builder.Clear(); + + while (m_stringScanner.Execute(m_scanner, out tag)) { + switch (tag[0]) { case JSONGrammar.TokenType.StringBound: - Restore(); - return new String(m_stringBuffer, 0, pos); + return m_builder.ToString(); case JSONGrammar.TokenType.UnescapedChar: - EnsureStringBufferSize(pos + m_tokenLen); - Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); - pos += m_tokenLen; + m_scanner.CopyTokenTo(m_builder); break; - case JSONGrammar.TokenType.EscapedUnicode: - EnsureStringBufferSize(pos + 1); - m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2); + case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence + m_scanner.CopyTokenTo(buf, 0); + m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); pos++; break; - case JSONGrammar.TokenType.EscapedChar: - EnsureStringBufferSize(pos + 1); - m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]); - pos++; + case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence + m_scanner.CopyTokenTo(buf, 0); + m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); break; default: break; @@ -84,13 +97,5 @@ throw new ParserException("Unexpected end of data"); } - - void EnsureStringBufferSize(int size) { - if (size > m_stringBuffer.Length) { - var newBuffer = new char[size]; - m_stringBuffer.CopyTo(newBuffer, 0); - m_stringBuffer = newBuffer; - } - } } }