comparison Implab/Formats/TextScanner.cs @ 177:a0ff6a0e9c44 ref20160224

refactoring
author cin
date Wed, 23 Mar 2016 01:42:00 +0300
parents 0c3c69fe225b
children d5c5db0335ee
comparison
equal deleted inserted replaced
176:0c3c69fe225b 177:a0ff6a0e9c44
1 using System; 1 using System;
2 using Implab.Components; 2 using Implab.Components;
3 using Implab.Automaton.RegularExpressions;
4 using System.Diagnostics; 3 using System.Diagnostics;
5 using Implab.Automaton; 4 using Implab.Automaton;
6 using System.IO;
7 using System.Text; 5 using System.Text;
8 6
9 namespace Implab.Formats { 7 namespace Implab.Formats {
10 public abstract class TextScanner : Disposable { 8 public abstract class TextScanner : Disposable {
11 readonly int m_bufferMax; 9 readonly int m_bufferMax;
16 int m_bufferSize; 14 int m_bufferSize;
17 int m_tokenOffset; 15 int m_tokenOffset;
18 int m_tokenLength; 16 int m_tokenLength;
19 17
20 /// <summary> 18 /// <summary>
21 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. 19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
22 /// </summary> 20 /// </summary>
23 /// <param name="bufferMax">Buffer max.</param> 21 /// <param name="bufferMax">Buffer max.</param>
24 /// <param name="chunkSize">Chunk size.</param> 22 /// <param name="chunkSize">Chunk size.</param>
25 protected TextScanner(int bufferMax, int chunkSize) { 23 protected TextScanner(int bufferMax, int chunkSize) {
26 Debug.Assert(m_chunkSize <= m_bufferMax); 24 Debug.Assert(m_chunkSize <= m_bufferMax);
28 m_bufferMax = bufferMax; 26 m_bufferMax = bufferMax;
29 m_chunkSize = chunkSize; 27 m_chunkSize = chunkSize;
30 } 28 }
31 29
32 /// <summary> 30 /// <summary>
33 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. 31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
34 /// </summary> 32 /// </summary>
35 /// <param name="buffer">Buffer.</param> 33 /// <param name="buffer">Buffer.</param>
36 protected TextScanner(char[] buffer) { 34 protected TextScanner(char[] buffer) {
37 if (buffer != null) { 35 if (buffer != null) {
38 m_buffer = buffer; 36 m_buffer = buffer;
46 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> 44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns>
47 /// <param name="dfa">The transition map for the automaton</param> 45 /// <param name="dfa">The transition map for the automaton</param>
48 /// <param name="final">Final states of the automaton.</param> 46 /// <param name="final">Final states of the automaton.</param>
49 /// <param name="tags">Tags.</param> 47 /// <param name="tags">Tags.</param>
50 /// <param name="state">The initial state for the automaton.</param> 48 /// <param name="state">The initial state for the automaton.</param>
51 internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { 49 /// <param name="alphabet"></param>
50 /// <param name = "tag"></param>
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
52 Safe.ArgumentNotNull(); 52 Safe.ArgumentNotNull();
53 m_tokenLength = 0; 53 m_tokenLength = 0;
54 54
55 var maxSymbol = alphabet.Length - 1; 55 var maxSymbol = alphabet.Length - 1;
56 56
57 do { 57 do {
58 // after the next chunk is read the offset in the buffer may change 58 // after the next chunk is read the offset in the buffer may change
59 int pos = m_bufferOffset + m_tokenLength; 59 int pos = m_bufferOffset + m_tokenLength;
60 60
61 while(pos < m_bufferSize) { 61 while (pos < m_bufferSize) {
62 var ch = m_buffer[pos]; 62 var ch = m_buffer[pos];
63 63
64 state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; 64 state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
65 if (state == DFAConst.UNREACHABLE_STATE) 65 if (state == DFAConst.UNREACHABLE_STATE)
66 break; 66 break;
67 67
68 pos++; 68 pos++;
69 } 69 }
75 m_bufferOffset += m_tokenLength; 75 m_bufferOffset += m_tokenLength;
76 76
77 if (final[state]) { 77 if (final[state]) {
78 tag = tags[state]; 78 tag = tags[state];
79 return true; 79 return true;
80 } else { 80 }
81 if (m_bufferOffset == m_bufferSize) { 81
82 if (m_tokenLength == 0) //EOF 82 if (m_bufferOffset == m_bufferSize) {
83 if (m_tokenLength == 0) //EOF
83 return false; 84 return false;
84 85
85 throw new ParserException(); 86 throw new ParserException();
86 } 87 }
87 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); 88
89 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
88 90
89 }
90 } 91 }
91 92
92 protected void Feed(char[] buffer, int offset, int length) { 93 protected void Feed(char[] buffer, int offset, int length) {
93 m_buffer = buffer; 94 m_buffer = buffer;
94 m_bufferOffset = offset; 95 m_bufferOffset = offset;
106 free += m_chunkSize; 107 free += m_chunkSize;
107 var used = m_bufferSize - m_bufferOffset; 108 var used = m_bufferSize - m_bufferOffset;
108 var size = used + free; 109 var size = used + free;
109 110
110 if (size > m_bufferMax) 111 if (size > m_bufferMax)
111 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024); 112 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024));
112 113
113 var temp = new char[size]; 114 var temp = new char[size];
114 115
115 var read = Read(temp, used, m_chunkSize); 116 var read = Read(temp, used, m_chunkSize);
116 if (read == 0) 117 if (read == 0)