diff Implab/Formats/BufferScanner.cs @ 173:ecfece82ca11 ref20160224

Working on text scanner
author cin
date Tue, 15 Mar 2016 02:11:06 +0300
parents
children 983df35b3ca1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Formats/BufferScanner.cs	Tue Mar 15 02:11:06 2016 +0300
@@ -0,0 +1,143 @@
+using System;
+using Implab.Automaton.RegularExpressions;
+using Implab.Automaton;
+
+namespace Implab.Formats {
+    public struct BufferScanner<TTag> {
+        char[] m_buffer;
+        int m_offset;
+        int m_position;
+        int m_hi;
+
+        readonly int m_chunk;
+        readonly int m_limit;
+
+        readonly DFAStateDescriptor<TTag>[] m_dfa;
+        int m_state;
+
+        public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) {
+            m_dfa = dfa;
+            m_state = initialState;
+            m_chunk = chunk;
+            m_limit = limit;
+            m_buffer = null;
+            m_offset = 0;
+            m_position = 0;
+            m_hi = 0;
+        }
+
+        public char[] Buffer {
+            get {
+                return m_buffer;
+            }
+        }
+
+        public int HiMark {
+            get {
+                return m_hi;
+            }
+        }
+
+        public int Position {
+            get {
+                return m_position;
+            }
+        }
+
+        public int Length {
+            get {
+                return m_hi - m_position;
+            }
+        }
+
+        public int TokenOffset {
+            get {
+                return m_offset;
+            }
+        }
+
+        public int TokenLength {
+            get {
+                return m_position - m_offset;
+            }
+        }
+
+        public void Init(char[] buffer, int position, int length) {
+            m_buffer = buffer;
+            m_position = position;
+            m_offset = position;
+            m_hi = position + length;
+        }
+
+        public int Extend() {
+            // free space
+            var free = m_buffer.Length - m_hi;
+
+            // if the buffer have enough free space
+            if (free > 0)
+                return free;
+
+            // effective size of the buffer
+            var size = m_buffer.Length - m_offset;
+                
+            // calculate the new size
+            int grow = Math.Min(m_limit - size, m_chunk);
+            if (grow <= 0)
+                throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
+
+            var temp = new char[size + grow];
+            Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset);
+            m_position -= m_offset;
+            m_hi -= m_offset;
+            m_offset = 0;
+            m_buffer = temp;
+
+            return free + grow;
+        }
+
+        public void RaiseMark(int size) {
+            m_hi += size;
+        }
+
+        /// <summary>
+        /// Scan this instance.
+        /// </summary>
+        /// <returns><c>true</c> - additional data required</returns>
+        public bool Scan() {
+            while (m_position < m_hi) {
+                var ch = m_buffer[m_position];
+                var next = m_dfa[m_state].transitions[(int)ch];
+                if (next == DFAConst.UNREACHABLE_STATE) {
+                    if (m_dfa[m_state].final)
+                        return false;
+
+                    throw new ParserException(
+                        String.Format(
+                            "Unexpected token '{0}'",
+                            new string(m_buffer, m_offset, m_position - m_offset)
+                        )
+                    );
+                }
+                m_state = next;
+                m_position++;
+            }
+
+            return true;
+        }
+
+        public void Eof() {
+            if (!m_dfa[m_state].final)
+                throw new ParserException(
+                    String.Format(
+                        "Unexpected token '{0}'",
+                        new string(m_buffer, m_offset, m_position - m_offset)
+                    )
+                );
+        }
+
+        public TTag[] GetTokenTags() {
+            return m_dfa[m_state].tags;
+        }
+    }
+}
+