comparison Implab/Formats/TextScanner.cs @ 175:96a89dcb4060 ref20160224

sync
author cin
date Mon, 21 Mar 2016 18:41:45 +0300
parents 983df35b3ca1
children 0c3c69fe225b
comparison
equal deleted inserted replaced
174:983df35b3ca1 175:96a89dcb4060
1 using System; 1 using System;
2 using Implab.Components; 2 using Implab.Components;
3 using Implab.Automaton.RegularExpressions;
4 using System.Diagnostics;
5 using Implab.Automaton;
3 6
4 namespace Implab.Formats { 7 namespace Implab.Formats {
5 public abstract class TextScanner<TTag> : Disposable { 8 public abstract class TextScanner<TTag> : Disposable {
6 9
7 readonly int[] m_buffer; 10 int m_maxSymbol;
11 int[] m_symbolMap;
12
13 readonly char[] m_buffer;
8 int m_bufferOffset; 14 int m_bufferOffset;
9 int m_dataLength; 15 int m_bufferSize;
10 int m_tokenLength; 16 int m_tokenLength;
11 17
12 TTag[] m_tags; 18 TTag[] m_tags;
13 19
14 BufferScanner<TTag> m_scanner; 20 protected bool ReadTokenInternal(DFAStateDescriptor<TTag>[] dfa, int state) {
21 Debug.Assert(dfa != null);
15 22
16 protected bool ReadTokenInternal() { 23 do {
17 if (EOF) 24 for (var pos = m_bufferOffset; pos < m_bufferSize; pos++) {
18 return false; 25 var ch = m_buffer[pos];
19 26 state = dfa[state].transitions[m_symbolMap[ch > m_maxSymbol ? m_maxSymbol : ch]];
20 // create a new scanner from template (scanners are value types) 27 if (state == DFAConst.UNREACHABLE_STATE)
21 var inst = m_scanner; 28 break;
29 }
30 } while (Feed());
22 31
23 m_tokenLength = 0; 32 if (dfa[state].final) {
24 33
25 while (inst.Scan(m_buffer, m_bufferOffset, m_dataLength)) {
26 m_tokenLength += m_dataLength;
27
28 var actual = Read(m_buffer, 0, m_buffer.Length);
29
30 m_bufferOffset = 0;
31 m_dataLength = actual;
32
33 if (actual == 0) {
34 inst.Eof();
35 break;
36 }
37 } 34 }
38 35
39 var len = inst.Position - m_bufferOffset;
40 m_tokenLength += len;
41 m_dataLength -= len;
42 m_bufferOffset = inst.Position;
43
44 // save result;
45
46 m_tags = inst.GetTokenTags();
47 } 36 }
48 37
49 protected abstract int Read(int[] buffer, int offset, int size); 38 bool Feed() {
39
40 }
41
42 protected abstract int Read(char[] buffer, int offset, int size);
50 43
51 protected TTag[] Tags { 44 protected TTag[] Tags {
52 get { 45 get {
53 return m_tags; 46 return m_tags;
54 } 47 }
55 } 48 }
56 49
57 public abstract bool EOF { get; } 50
58
59 } 51 }
60 } 52 }
61 53