Mercurial > pub > ImplabNet
comparison Implab/Formats/TextScanner.cs @ 175:96a89dcb4060 ref20160224
sync
author | cin |
---|---|
date | Mon, 21 Mar 2016 18:41:45 +0300 |
parents | 983df35b3ca1 |
children | 0c3c69fe225b |
comparison
equal
deleted
inserted
replaced
174:983df35b3ca1 | 175:96a89dcb4060 |
---|---|
1 using System; | 1 using System; |
2 using Implab.Components; | 2 using Implab.Components; |
3 using Implab.Automaton.RegularExpressions; | |
4 using System.Diagnostics; | |
5 using Implab.Automaton; | |
3 | 6 |
4 namespace Implab.Formats { | 7 namespace Implab.Formats { |
5 public abstract class TextScanner<TTag> : Disposable { | 8 public abstract class TextScanner<TTag> : Disposable { |
6 | 9 |
7 readonly int[] m_buffer; | 10 int m_maxSymbol; |
11 int[] m_symbolMap; | |
12 | |
13 readonly char[] m_buffer; | |
8 int m_bufferOffset; | 14 int m_bufferOffset; |
9 int m_dataLength; | 15 int m_bufferSize; |
10 int m_tokenLength; | 16 int m_tokenLength; |
11 | 17 |
12 TTag[] m_tags; | 18 TTag[] m_tags; |
13 | 19 |
14 BufferScanner<TTag> m_scanner; | 20 protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) { |
21 Debug.Assert(dfa != null); | |
15 | 22 |
16 protected bool ReadTokenInternal() { | 23 do { |
17 if (EOF) | 24 for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) { |
18 return false; | 25 var ch = m_buffer[pos]; |
19 | 26 state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]]; |
20 // create a new scanner from template (scanners are value types) | 27 if (state == DFAConst.UNREACHABLE_STATE) |
21 var inst = m_scanner; | 28 break; |
29 } | |
30 } while (Feed()); | |
22 | 31 |
23 m_tokenLength = 0; | 32 if (dfa[state].final) { |
24 | 33 |
25 while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) { | |
26 m_tokenLength += m_dataLength; | |
27 | |
28 var actual = Read(m_buffer, 0, m_buffer.Length); | |
29 | |
30 m_bufferOffset = 0; | |
31 m_dataLength = actual; | |
32 | |
33 if (actual == 0) { | |
34 inst.Eof(); | |
35 break; | |
36 } | |
37 } | 34 } |
38 | 35 |
39 var len = inst.Position - m_bufferOffset; | |
40 m_tokenLength += len; | |
41 m_dataLength -= len; | |
42 m_bufferOffset = inst.Position; | |
43 | |
44 // save result; | |
45 | |
46 m_tags = inst.GetTokenTags(); | |
47 } | 36 } |
48 | 37 |
49 protected abstract int Read(int[] buffer, int offset, int size); | 38 bool Feed() { |
39 | |
40 } | |
41 | |
42 protected abstract int Read(char[] buffer, int offset, int size); | |
50 | 43 |
51 protected TTag[] Tags { | 44 protected TTag[] Tags { |
52 get { | 45 get { |
53 return m_tags; | 46 return m_tags; |
54 } | 47 } |
55 } | 48 } |
56 | 49 |
57 public abstract bool EOF { get; } | 50 |
58 | |
59 } | 51 } |
60 } | 52 } |
61 | 53 |