Mercurial > pub > ImplabNet
view Implab/Formats/TextScanner.cs @ 180:c32688129f14 ref20160224
refactoring complete, JSONParser rewritten
author | cin |
---|---|
date | Thu, 24 Mar 2016 02:30:46 +0300 |
parents | d5c5db0335ee |
children | b2b6a6640aa3 |
line wrap: on
line source
using System; using Implab.Components; using System.Diagnostics; using Implab.Automaton; using System.Text; namespace Implab.Formats { public abstract class TextScanner : Disposable { readonly int m_bufferMax; readonly int m_chunkSize; char[] m_buffer; int m_bufferOffset; int m_bufferSize; int m_tokenOffset; int m_tokenLength; /// <summary> /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. /// </summary> /// <param name="bufferMax">Buffer max.</param> /// <param name="chunkSize">Chunk size.</param> protected TextScanner(int bufferMax, int chunkSize) { Debug.Assert(m_chunkSize <= m_bufferMax); m_bufferMax = bufferMax; m_chunkSize = chunkSize; } /// <summary> /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. /// </summary> /// <param name="buffer">Buffer.</param> protected TextScanner(char[] buffer) { if (buffer != null) { m_buffer = buffer; m_bufferSize = buffer.Length; } } /// <summary> /// (hungry) Reads the next token. /// </summary> /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> /// <param name="dfa">The transition map for the automaton</param> /// <param name="final">Final states of the automaton.</param> /// <param name="tags">Tags.</param> /// <param name="state">The initial state for the automaton.</param> /// <param name="alphabet"></param> /// <param name = "tag"></param> internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { m_tokenLength = 0; tag = null; var maxSymbol = alphabet.Length - 1; do { // after the next chunk is read the offset in the buffer may change int pos = m_bufferOffset + m_tokenLength; while (pos < m_bufferSize) { var ch = m_buffer[pos]; state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; if (state == AutomatonConst.UNREACHABLE_STATE) break; pos++; } m_tokenLength = pos - m_bufferOffset; } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); m_tokenOffset = m_bufferOffset; m_bufferOffset += m_tokenLength; if (final[state]) { tag = tags[state]; return true; } if (m_bufferOffset == m_bufferSize) { if (m_tokenLength == 0) //EOF return false; throw new ParserException(); } throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); } protected void Feed(char[] buffer, int offset, int length) { m_buffer = buffer; m_bufferOffset = offset; m_bufferSize = offset + length; } protected bool Feed() { if (m_chunkSize <= 0) return false; if (m_buffer != null) { var free = m_buffer.Length - m_bufferSize; if (free < m_chunkSize) { free += m_chunkSize; var used = m_bufferSize - m_bufferOffset; var size = used + free; if (size > m_bufferMax) throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024)); var temp = new char[size]; var read = Read(temp, used, m_chunkSize); if (read == 0) return false; Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); m_bufferOffset = 0; m_bufferSize = used + read; m_buffer = temp; } else { var read = Read(m_buffer, m_bufferSize, m_chunkSize); if (read == 0) return false; m_bufferSize += m_chunkSize; } return true; } else { Debug.Assert(m_bufferOffset == 0); m_buffer = new char[m_chunkSize]; m_bufferSize = Read(m_buffer, 0, m_chunkSize); return (m_bufferSize != 0); } } protected abstract int Read(char[] buffer, int offset, int size); public string GetTokenValue() { return new String(m_buffer, m_tokenOffset, m_tokenLength); } public void CopyTokenTo(char[] buffer, int offset) { m_buffer.CopyTo(buffer, offset); } public void CopyTokenTo(StringBuilder sb) { sb.Append(m_buffer, m_tokenOffset, m_tokenLength); } } }