Mercurial > pub > ImplabNet
comparison Implab/Formats/TextScanner.cs @ 177:a0ff6a0e9c44 ref20160224
refactoring
author | cin |
---|---|
date | Wed, 23 Mar 2016 01:42:00 +0300 |
parents | 0c3c69fe225b |
children | d5c5db0335ee |
comparison
equal
deleted
inserted
replaced
176:0c3c69fe225b | 177:a0ff6a0e9c44 |
---|---|
1 using System; | 1 using System; |
2 using Implab.Components; | 2 using Implab.Components; |
3 using Implab.Automaton.RegularExpressions; | |
4 using System.Diagnostics; | 3 using System.Diagnostics; |
5 using Implab.Automaton; | 4 using Implab.Automaton; |
6 using System.IO; | |
7 using System.Text; | 5 using System.Text; |
8 | 6 |
9 namespace Implab.Formats { | 7 namespace Implab.Formats { |
10 public abstract class TextScanner : Disposable { | 8 public abstract class TextScanner : Disposable { |
11 readonly int m_bufferMax; | 9 readonly int m_bufferMax; |
16 int m_bufferSize; | 14 int m_bufferSize; |
17 int m_tokenOffset; | 15 int m_tokenOffset; |
18 int m_tokenLength; | 16 int m_tokenLength; |
19 | 17 |
20 /// <summary> | 18 /// <summary> |
21 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. | 19 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. |
22 /// </summary> | 20 /// </summary> |
23 /// <param name="bufferMax">Buffer max.</param> | 21 /// <param name="bufferMax">Buffer max.</param> |
24 /// <param name="chunkSize">Chunk size.</param> | 22 /// <param name="chunkSize">Chunk size.</param> |
25 protected TextScanner(int bufferMax, int chunkSize) { | 23 protected TextScanner(int bufferMax, int chunkSize) { |
26 Debug.Assert(m_chunkSize <= m_bufferMax); | 24 Debug.Assert(m_chunkSize <= m_bufferMax); |
28 m_bufferMax = bufferMax; | 26 m_bufferMax = bufferMax; |
29 m_chunkSize = chunkSize; | 27 m_chunkSize = chunkSize; |
30 } | 28 } |
31 | 29 |
32 /// <summary> | 30 /// <summary> |
33 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. | 31 /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. |
34 /// </summary> | 32 /// </summary> |
35 /// <param name="buffer">Buffer.</param> | 33 /// <param name="buffer">Buffer.</param> |
36 protected TextScanner(char[] buffer) { | 34 protected TextScanner(char[] buffer) { |
37 if (buffer != null) { | 35 if (buffer != null) { |
38 m_buffer = buffer; | 36 m_buffer = buffer; |
46 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> | 44 /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> |
47 /// <param name="dfa">The transition map for the automaton</param> | 45 /// <param name="dfa">The transition map for the automaton</param> |
48 /// <param name="final">Final states of the automaton.</param> | 46 /// <param name="final">Final states of the automaton.</param> |
49 /// <param name="tags">Tags.</param> | 47 /// <param name="tags">Tags.</param> |
50 /// <param name="state">The initial state for the automaton.</param> | 48 /// <param name="state">The initial state for the automaton.</param> |
51 internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | 49 /// <param name="alphabet"></param> |
50 /// <param name = "tag"></param> | |
51 internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { | |
52 Safe.ArgumentNotNull(); | 52 Safe.ArgumentNotNull(); |
53 m_tokenLength = 0; | 53 m_tokenLength = 0; |
54 | 54 |
55 var maxSymbol = alphabet.Length - 1; | 55 var maxSymbol = alphabet.Length - 1; |
56 | 56 |
57 do { | 57 do { |
58 // after the next chunk is read the offset in the buffer may change | 58 // after the next chunk is read the offset in the buffer may change |
59 int pos = m_bufferOffset + m_tokenLength; | 59 int pos = m_bufferOffset + m_tokenLength; |
60 | 60 |
61 while(pos < m_bufferSize) { | 61 while (pos < m_bufferSize) { |
62 var ch = m_buffer[pos]; | 62 var ch = m_buffer[pos]; |
63 | 63 |
64 state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; | 64 state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; |
65 if (state == DFAConst.UNREACHABLE_STATE) | 65 if (state == DFAConst.UNREACHABLE_STATE) |
66 break; | 66 break; |
67 | 67 |
68 pos++; | 68 pos++; |
69 } | 69 } |
75 m_bufferOffset += m_tokenLength; | 75 m_bufferOffset += m_tokenLength; |
76 | 76 |
77 if (final[state]) { | 77 if (final[state]) { |
78 tag = tags[state]; | 78 tag = tags[state]; |
79 return true; | 79 return true; |
80 } else { | 80 } |
81 if (m_bufferOffset == m_bufferSize) { | 81 |
82 if (m_tokenLength == 0) //EOF | 82 if (m_bufferOffset == m_bufferSize) { |
83 if (m_tokenLength == 0) //EOF | |
83 return false; | 84 return false; |
84 | 85 |
85 throw new ParserException(); | 86 throw new ParserException(); |
86 } | 87 } |
87 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | 88 |
89 throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); | |
88 | 90 |
89 } | |
90 } | 91 } |
91 | 92 |
92 protected void Feed(char[] buffer, int offset, int length) { | 93 protected void Feed(char[] buffer, int offset, int length) { |
93 m_buffer = buffer; | 94 m_buffer = buffer; |
94 m_bufferOffset = offset; | 95 m_bufferOffset = offset; |
106 free += m_chunkSize; | 107 free += m_chunkSize; |
107 var used = m_bufferSize - m_bufferOffset; | 108 var used = m_bufferSize - m_bufferOffset; |
108 var size = used + free; | 109 var size = used + free; |
109 | 110 |
110 if (size > m_bufferMax) | 111 if (size > m_bufferMax) |
111 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024); | 112 throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); |
112 | 113 |
113 var temp = new char[size]; | 114 var temp = new char[size]; |
114 | 115 |
115 var read = Read(temp, used, m_chunkSize); | 116 var read = Read(temp, used, m_chunkSize); |
116 if (read == 0) | 117 if (read == 0) |