Mercurial > pub > ImplabNet
changeset 173:ecfece82ca11 ref20160224
Working on text scanner
author | cin |
---|---|
date | Tue, 15 Mar 2016 02:11:06 +0300 (2016-03-14) |
parents | 92d5278d1b10 |
children | 983df35b3ca1 |
files | Implab/Formats/BufferScanner.cs Implab/Formats/TextScanner.cs Implab/Implab.csproj |
diffstat | 3 files changed, 217 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/BufferScanner.cs Tue Mar 15 02:11:06 2016 +0300 @@ -0,0 +1,143 @@ +using System; +using Implab.Automaton.RegularExpressions; +using Implab.Automaton; + +namespace Implab.Formats { + public struct BufferScanner<TTag> { + char[] m_buffer; + int m_offset; + int m_position; + int m_hi; + + readonly int m_chunk; + readonly int m_limit; + + readonly DFAStateDescriptor<TTag>[] m_dfa; + int m_state; + + public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) { + m_dfa = dfa; + m_state = initialState; + m_chunk = chunk; + m_limit = limit; + m_buffer = null; + m_offset = 0; + m_position = 0; + m_hi = 0; + } + + public char[] Buffer { + get { + return m_buffer; + } + } + + public int HiMark { + get { + return m_hi; + } + } + + public int Position { + get { + return m_position; + } + } + + public int Length { + get { + return m_hi - m_position; + } + } + + public int TokenOffset { + get { + return m_offset; + } + } + + public int TokenLength { + get { + return m_position - m_offset; + } + } + + public void Init(char[] buffer, int position, int length) { + m_buffer = buffer; + m_position = position; + m_offset = position; + m_hi = position + length; + } + + public int Extend() { + // free space + var free = m_buffer.Length - m_hi; + + // if the buffer have enough free space + if (free > 0) + return free; + + // effective size of the buffer + var size = m_buffer.Length - m_offset; + + // calculate the new size + int grow = Math.Min(m_limit - size, m_chunk); + if (grow <= 0) + throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit)); + + var temp = new char[size + grow]; + Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset); + m_position -= m_offset; + m_hi -= m_offset; + m_offset = 0; + m_buffer = temp; + + return free + grow; + } + + public void RaiseMark(int size) { + m_hi += size; + } + + /// <summary> + /// Scan this instance. + /// </summary> + /// <returns><c>true</c> - additional data required</returns> + public bool Scan() { + while (m_position < m_hi) { + var ch = m_buffer[m_position]; + var next = m_dfa[m_state].transitions[(int)ch]; + if (next == DFAConst.UNREACHABLE_STATE) { + if (m_dfa[m_state].final) + return false; + + throw new ParserException( + String.Format( + "Unexpected token '{0}'", + new string(m_buffer, m_offset, m_position - m_offset) + ) + ); + } + m_state = next; + m_position++; + } + + return true; + } + + public void Eof() { + if (!m_dfa[m_state].final) + throw new ParserException( + String.Format( + "Unexpected token '{0}'", + new string(m_buffer, m_offset, m_position - m_offset) + ) + ); + } + + public TTag[] GetTokenTags() { + return m_dfa[m_state].tags; + } + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/TextScanner.cs Tue Mar 15 02:11:06 2016 +0300 @@ -0,0 +1,72 @@ +using System; +using Implab.Components; + +namespace Implab.Formats { + public abstract class TextScanner<TTag> : Disposable { + + char[] m_buffer; + int m_offset; + int m_length; + int m_tokenOffset; + int m_tokenLength; + TTag[] m_tags; + + BufferScanner<TTag> m_scanner; + + protected bool ReadTokenInternal() { + if (EOF) + return false; + + // create a new scanner from template (scanners are structs) + var inst = m_scanner; + + // initialize the scanner + inst.Init(m_buffer, m_offset, m_length); + + // do work + while (inst.Scan()) + Feed(ref inst); + + // save result; + m_buffer = inst.Buffer; + m_length = inst.Length; + m_offset = inst.Position; + m_tokenOffset = inst.TokenOffset; + m_tokenLength = inst.TokenLength; + + m_tags = inst.GetTokenTags(); + } + + protected string GetToken() { + return new String(m_buffer, m_tokenOffset, m_tokenLength); + } + + protected TTag[] Tags { + get { + return m_tags; + } + } + + /// <summary> + /// Feed the specified scanner. + /// </summary> + /// <param name="scanner">Scanner.</param> + /// <example> + /// protected override void Feed(ref BufferScanner<TTag> scanner) { + /// var size = scanner.Extend(); + /// var actual = m_reader.Read(scanner.Buffer, scanner.HiMark, size); + /// if (actual == 0) { + /// m_eof = true; + /// scanner.Eof(); + /// } else { + /// scanner.RaiseHiMark(actual); + /// } + /// } + /// </example> + protected abstract void Feed(ref BufferScanner<TTag> scanner); + + public abstract bool EOF { get; } + + } +} +
--- a/Implab/Implab.csproj Mon Mar 14 01:19:38 2016 +0300 +++ b/Implab/Implab.csproj Tue Mar 15 02:11:06 2016 +0300 @@ -191,6 +191,8 @@ <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> + <Compile Include="Formats\BufferScanner.cs" /> + <Compile Include="Formats\TextScanner.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <ItemGroup />