Mercurial > pub > ImplabNet
comparison Implab/Formats/BufferScanner.cs @ 173:ecfece82ca11 ref20160224
Working on text scanner
| author | cin |
|---|---|
| date | Tue, 15 Mar 2016 02:11:06 +0300 |
| parents | |
| children | 983df35b3ca1 |
comparison
equal
deleted
inserted
replaced
| 172:92d5278d1b10 | 173:ecfece82ca11 |
|---|---|
| 1 using System; | |
| 2 using Implab.Automaton.RegularExpressions; | |
| 3 using Implab.Automaton; | |
| 4 | |
| 5 namespace Implab.Formats { | |
| 6 public struct BufferScanner<TTag> { | |
| 7 char[] m_buffer; | |
| 8 int m_offset; | |
| 9 int m_position; | |
| 10 int m_hi; | |
| 11 | |
| 12 readonly int m_chunk; | |
| 13 readonly int m_limit; | |
| 14 | |
| 15 readonly DFAStateDescriptor<TTag>[] m_dfa; | |
| 16 int m_state; | |
| 17 | |
| 18 public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) { | |
| 19 m_dfa = dfa; | |
| 20 m_state = initialState; | |
| 21 m_chunk = chunk; | |
| 22 m_limit = limit; | |
| 23 m_buffer = null; | |
| 24 m_offset = 0; | |
| 25 m_position = 0; | |
| 26 m_hi = 0; | |
| 27 } | |
| 28 | |
| 29 public char[] Buffer { | |
| 30 get { | |
| 31 return m_buffer; | |
| 32 } | |
| 33 } | |
| 34 | |
| 35 public int HiMark { | |
| 36 get { | |
| 37 return m_hi; | |
| 38 } | |
| 39 } | |
| 40 | |
| 41 public int Position { | |
| 42 get { | |
| 43 return m_position; | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 public int Length { | |
| 48 get { | |
| 49 return m_hi - m_position; | |
| 50 } | |
| 51 } | |
| 52 | |
| 53 public int TokenOffset { | |
| 54 get { | |
| 55 return m_offset; | |
| 56 } | |
| 57 } | |
| 58 | |
| 59 public int TokenLength { | |
| 60 get { | |
| 61 return m_position - m_offset; | |
| 62 } | |
| 63 } | |
| 64 | |
| 65 public void Init(char[] buffer, int position, int length) { | |
| 66 m_buffer = buffer; | |
| 67 m_position = position; | |
| 68 m_offset = position; | |
| 69 m_hi = position + length; | |
| 70 } | |
| 71 | |
| 72 public int Extend() { | |
| 73 // free space | |
| 74 var free = m_buffer.Length - m_hi; | |
| 75 | |
| 76 // if the buffer have enough free space | |
| 77 if (free > 0) | |
| 78 return free; | |
| 79 | |
| 80 // effective size of the buffer | |
| 81 var size = m_buffer.Length - m_offset; | |
| 82 | |
| 83 // calculate the new size | |
| 84 int grow = Math.Min(m_limit - size, m_chunk); | |
| 85 if (grow <= 0) | |
| 86 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit)); | |
| 87 | |
| 88 var temp = new char[size + grow]; | |
| 89 Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset); | |
| 90 m_position -= m_offset; | |
| 91 m_hi -= m_offset; | |
| 92 m_offset = 0; | |
| 93 m_buffer = temp; | |
| 94 | |
| 95 return free + grow; | |
| 96 } | |
| 97 | |
| 98 public void RaiseMark(int size) { | |
| 99 m_hi += size; | |
| 100 } | |
| 101 | |
| 102 /// <summary> | |
| 103 /// Scan this instance. | |
| 104 /// </summary> | |
| 105 /// <returns><c>true</c> - additional data required</returns> | |
| 106 public bool Scan() { | |
| 107 while (m_position < m_hi) { | |
| 108 var ch = m_buffer[m_position]; | |
| 109 var next = m_dfa[m_state].transitions[(int)ch]; | |
| 110 if (next == DFAConst.UNREACHABLE_STATE) { | |
| 111 if (m_dfa[m_state].final) | |
| 112 return false; | |
| 113 | |
| 114 throw new ParserException( | |
| 115 String.Format( | |
| 116 "Unexpected token '{0}'", | |
| 117 new string(m_buffer, m_offset, m_position - m_offset) | |
| 118 ) | |
| 119 ); | |
| 120 } | |
| 121 m_state = next; | |
| 122 m_position++; | |
| 123 } | |
| 124 | |
| 125 return true; | |
| 126 } | |
| 127 | |
| 128 public void Eof() { | |
| 129 if (!m_dfa[m_state].final) | |
| 130 throw new ParserException( | |
| 131 String.Format( | |
| 132 "Unexpected token '{0}'", | |
| 133 new string(m_buffer, m_offset, m_position - m_offset) | |
| 134 ) | |
| 135 ); | |
| 136 } | |
| 137 | |
| 138 public TTag[] GetTokenTags() { | |
| 139 return m_dfa[m_state].tags; | |
| 140 } | |
| 141 } | |
| 142 } | |
| 143 |
