# HG changeset patch # User cin # Date 1458574905 -10800 # Node ID 96a89dcb40601376efdd8f1bd612cd24db68a3a7 # Parent 983df35b3ca112300daf992688b8fff6d25cfa4f sync diff -r 983df35b3ca1 -r 96a89dcb4060 Implab/Formats/TextScanner.cs --- a/Implab/Formats/TextScanner.cs Fri Mar 18 18:10:30 2016 +0300 +++ b/Implab/Formats/TextScanner.cs Mon Mar 21 18:41:45 2016 +0300 @@ -1,52 +1,45 @@ using System; using Implab.Components; +using Implab.Automaton.RegularExpressions; +using System.Diagnostics; +using Implab.Automaton; namespace Implab.Formats { public abstract class TextScanner : Disposable { - readonly int[] m_buffer; + int m_maxSymbol; + int[] m_symbolMap; + + readonly char[] m_buffer; int m_bufferOffset; - int m_dataLength; + int m_bufferSize; int m_tokenLength; TTag[] m_tags; - BufferScanner m_scanner; - - protected bool ReadTokenInternal() { - if (EOF) - return false; - - // create a new scanner from template (scanners are value types) - var inst = m_scanner; - - m_tokenLength = 0; + protected bool ReadTokenInternal(DFAStateDescriptor[] dfa, int state) { + Debug.Assert(dfa != null); - while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) { - m_tokenLength += m_dataLength; + do { + for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { + var ch = m_buffer[pos]; + state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; + if (state == DFAConst.UNREACHABLE_STATE) + break; + } + } while (Feed()); - var actual = Read(m_buffer, 0, m_buffer.Length); - - m_bufferOffset = 0; - m_dataLength = actual; + if (dfa[state].final) { - if (actual == 0) { - inst.Eof(); - break; - } } - var len = inst.Position - m_bufferOffset; - m_tokenLength += len; - m_dataLength -= len; - m_bufferOffset = inst.Position; - - // save result; - - m_tags = inst.GetTokenTags(); } - protected abstract int Read(int[] buffer, int offset, int size); + bool Feed() { + + } + + protected abstract int Read(char[] buffer, int offset, int size); protected TTag[] Tags { get { @@ -54,8 +47,7 @@ } } - public abstract bool EOF { get; } - + } }