Mercurial > pub > ImplabNet
comparison Implab/Formats/TextScanner.cs @ 192:f1da3afc3521 release v2.1
Слияние с v2
| author | cin |
|---|---|
| date | Fri, 22 Apr 2016 13:10:34 +0300 |
| parents | 76e8f2ba12b8 |
| children |
comparison
equal
deleted
inserted
replaced
| 71:1714fd8678ef | 192:f1da3afc3521 |
|---|---|
| 1 using System; | |
| 2 using Implab.Components; | |
| 3 using System.Diagnostics; | |
| 4 using Implab.Automaton; | |
| 5 using System.Text; | |
| 6 | |
| 7 namespace Implab.Formats { | |
| 8 public abstract class TextScanner : Disposable { | |
| 9 readonly int m_bufferMax; | |
| 10 readonly int m_chunkSize; | |
| 11 | |
| 12 char[] m_buffer; | |
| 13 int m_bufferOffset; | |
| 14 int m_bufferSize; | |
| 15 int m_tokenOffset; | |
| 16 int m_tokenLength; | |
| 17 | |
| 18 /// <summary> | |
| 19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
| 20 /// </summary> | |
| 21 /// <param name="bufferMax">Buffer max.</param> | |
| 22 /// <param name="chunkSize">Chunk size.</param> | |
| 23 protected TextScanner(int bufferMax, int chunkSize) { | |
| 24 Debug.Assert(m_chunkSize <= m_bufferMax); | |
| 25 | |
| 26 m_bufferMax = bufferMax; | |
| 27 m_chunkSize = chunkSize; | |
| 28 } | |
| 29 | |
| 30 /// <summary> | |
| 31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. | |
| 32 /// </summary> | |
| 33 /// <param name="buffer">Buffer.</param> | |
| 34 protected TextScanner(char[] buffer) { | |
| 35 if (buffer != null) { | |
| 36 m_buffer = buffer; | |
| 37 m_bufferSize = buffer.Length; | |
| 38 } | |
| 39 } | |
| 40 | |
| 41 /// <summary> | |
| 42 /// (hungry) Reads the next token. | |
| 43 /// </summary> | |
| 44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> | |
| 45 /// <param name="dfa">The transition map for the automaton</param> | |
| 46 /// <param name="final">Final states of the automaton.</param> | |
| 47 /// <param name="tags">Tags.</param> | |
| 48 /// <param name="state">The initial state for the automaton.</param> | |
| 49 /// <param name="alphabet"></param> | |
| 50 /// <param name = "tag"></param> | |
| 51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
| 52 m_tokenLength = 0; | |
| 53 tag = null; | |
| 54 | |
| 55 var maxSymbol = alphabet.Length - 1; | |
| 56 int next; | |
| 57 do { | |
| 58 // after the next chunk is read the offset in the buffer may change | |
| 59 int pos = m_bufferOffset + m_tokenLength; | |
| 60 next = state; | |
| 61 while (pos < m_bufferSize) { | |
| 62 var ch = m_buffer[pos]; | |
| 63 | |
| 64 next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | |
| 65 | |
| 66 if (next == AutomatonConst.UNREACHABLE_STATE) | |
| 67 break; | |
| 68 | |
| 69 state = next; | |
| 70 pos++; | |
| 71 } | |
| 72 m_tokenLength = pos - m_bufferOffset; | |
| 73 } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); | |
| 74 | |
| 75 m_tokenOffset = m_bufferOffset; | |
| 76 m_bufferOffset += m_tokenLength; | |
| 77 | |
| 78 if (final[state]) { | |
| 79 tag = tags[state]; | |
| 80 return true; | |
| 81 } | |
| 82 | |
| 83 if (m_bufferOffset == m_bufferSize) { | |
| 84 if (m_tokenLength == 0) //EOF | |
| 85 return false; | |
| 86 | |
| 87 throw new ParserException(); | |
| 88 } | |
| 89 | |
| 90 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
| 91 | |
| 92 } | |
| 93 | |
| 94 protected void Feed(char[] buffer, int offset, int length) { | |
| 95 m_buffer = buffer; | |
| 96 m_bufferOffset = offset; | |
| 97 m_bufferSize = offset + length; | |
| 98 } | |
| 99 | |
| 100 protected bool Feed() { | |
| 101 if (m_chunkSize <= 0) | |
| 102 return false; | |
| 103 | |
| 104 if (m_buffer != null) { | |
| 105 var free = m_buffer.Length - m_bufferSize; | |
| 106 | |
| 107 if (free < m_chunkSize) { | |
| 108 free += m_chunkSize; | |
| 109 var used = m_bufferSize - m_bufferOffset; | |
| 110 var size = used + free; | |
| 111 | |
| 112 if (size > m_bufferMax) | |
| 113 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024)); | |
| 114 | |
| 115 var temp = new char[size]; | |
| 116 | |
| 117 var read = Read(temp, used, m_chunkSize); | |
| 118 if (read == 0) | |
| 119 return false; | |
| 120 | |
| 121 Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); | |
| 122 | |
| 123 m_bufferOffset = 0; | |
| 124 m_bufferSize = used + read; | |
| 125 m_buffer = temp; | |
| 126 } else { | |
| 127 var read = Read(m_buffer, m_bufferSize, m_chunkSize); | |
| 128 if (read == 0) | |
| 129 return false; | |
| 130 m_bufferSize += m_chunkSize; | |
| 131 } | |
| 132 return true; | |
| 133 } else { | |
| 134 Debug.Assert(m_bufferOffset == 0); | |
| 135 m_buffer = new char[m_chunkSize]; | |
| 136 m_bufferSize = Read(m_buffer, 0, m_chunkSize); | |
| 137 return (m_bufferSize != 0); | |
| 138 } | |
| 139 } | |
| 140 | |
| 141 protected abstract int Read(char[] buffer, int offset, int size); | |
| 142 | |
| 143 public string GetTokenValue() { | |
| 144 return new String(m_buffer, m_tokenOffset, m_tokenLength); | |
| 145 } | |
| 146 | |
| 147 public void CopyTokenTo(char[] buffer, int offset) { | |
| 148 Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); | |
| 149 } | |
| 150 | |
| 151 public void CopyTokenTo(StringBuilder sb) { | |
| 152 sb.Append(m_buffer, m_tokenOffset, m_tokenLength); | |
| 153 } | |
| 154 | |
| 155 } | |
| 156 } | |
| 157 |
