diff Implab/Formats/TextScanner.cs @ 192:f1da3afc3521 release v2.1

Слияние с v2
author cin
date Fri, 22 Apr 2016 13:10:34 +0300
parents 76e8f2ba12b8
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Formats/TextScanner.cs	Fri Apr 22 13:10:34 2016 +0300
@@ -0,0 +1,157 @@
+using System;
+using Implab.Components;
+using System.Diagnostics;
+using Implab.Automaton;
+using System.Text;
+
+namespace Implab.Formats {
+    public abstract class TextScanner : Disposable {
+        readonly int m_bufferMax;
+        readonly int m_chunkSize;
+
+        char[] m_buffer;
+        int m_bufferOffset;
+        int m_bufferSize;
+        int m_tokenOffset;
+        int m_tokenLength;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
+        /// </summary>
+        /// <param name="bufferMax">Buffer max.</param>
+        /// <param name="chunkSize">Chunk size.</param>
+        protected TextScanner(int bufferMax, int chunkSize) {
+            Debug.Assert(m_chunkSize <= m_bufferMax);
+
+            m_bufferMax = bufferMax;
+            m_chunkSize = chunkSize;
+        }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
+        /// </summary>
+        /// <param name="buffer">Buffer.</param>
+        protected TextScanner(char[] buffer) {
+            if (buffer != null) {
+                m_buffer = buffer;
+                m_bufferSize = buffer.Length;
+            }
+        }
+
+        /// <summary>
+        /// (hungry) Reads the next token.
+        /// </summary>
+        /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
+        /// <param name="dfa">The transition map for the automaton</param>
+        /// <param name="final">Final states of the automaton.</param>
+        /// <param name="tags">Tags.</param>
+        /// <param name="state">The initial state for the automaton.</param>
+        /// <param name="alphabet"></param>
+        /// <param name = "tag"></param>
+        internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
+            m_tokenLength = 0;
+            tag = null;
+
+            var maxSymbol = alphabet.Length - 1;
+            int next;
+            do {
+                // after the next chunk is read the offset in the buffer may change
+                int pos = m_bufferOffset + m_tokenLength;
+                next = state;
+                while (pos < m_bufferSize) {
+                    var ch = m_buffer[pos];
+
+                    next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
+                    
+                    if (next == AutomatonConst.UNREACHABLE_STATE)
+                        break;
+                 
+                    state = next;
+                    pos++;
+                }
+                m_tokenLength = pos - m_bufferOffset;
+            } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
+
+            m_tokenOffset = m_bufferOffset;
+            m_bufferOffset += m_tokenLength;
+
+            if (final[state]) {
+                tag = tags[state];
+                return true;
+            }
+
+            if (m_bufferOffset == m_bufferSize) {
+                if (m_tokenLength == 0) //EOF
+                        return false;
+                    
+                throw new ParserException();
+            }
+
+            throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
+            
+        }
+
+        protected void Feed(char[] buffer, int offset, int length) {
+            m_buffer = buffer;
+            m_bufferOffset = offset;
+            m_bufferSize = offset + length;
+        }
+
+        protected bool Feed() {
+            if (m_chunkSize <= 0)
+                return false;
+            
+            if (m_buffer != null) {
+                var free = m_buffer.Length - m_bufferSize;
+
+                if (free < m_chunkSize) {
+                    free += m_chunkSize;
+                    var used = m_bufferSize - m_bufferOffset;
+                    var size = used + free;
+
+                    if (size > m_bufferMax)
+                        throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024));
+                    
+                    var temp = new char[size];
+
+                    var read = Read(temp, used, m_chunkSize);
+                    if (read == 0)
+                        return false;
+
+                    Array.Copy(m_buffer, m_bufferOffset, temp, 0, used);
+
+                    m_bufferOffset = 0;
+                    m_bufferSize = used + read;
+                    m_buffer = temp;
+                } else {
+                    var read = Read(m_buffer, m_bufferSize, m_chunkSize);
+                    if (read == 0)
+                        return false;
+                    m_bufferSize += m_chunkSize;
+                }
+                return true;
+            } else {
+                Debug.Assert(m_bufferOffset == 0);
+                m_buffer = new char[m_chunkSize];
+                m_bufferSize = Read(m_buffer, 0, m_chunkSize);
+                return (m_bufferSize != 0);
+            }
+        }
+
+        protected abstract int Read(char[] buffer, int offset, int size);
+
+        public string GetTokenValue() {
+            return new String(m_buffer, m_tokenOffset, m_tokenLength);
+        }
+
+        public void CopyTokenTo(char[] buffer, int offset) {
+            Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
+        }
+
+        public void CopyTokenTo(StringBuilder sb) {
+            sb.Append(m_buffer, m_tokenOffset, m_tokenLength);
+        }
+         
+    }
+}
+