Mercurial > pub > ImplabNet
comparison Implab/Formats/BufferScanner.cs @ 173:ecfece82ca11 ref20160224
Working on text scanner
author | cin |
---|---|
date | Tue, 15 Mar 2016 02:11:06 +0300 |
parents | |
children | 983df35b3ca1 |
comparison
equal
deleted
inserted
replaced
172:92d5278d1b10 | 173:ecfece82ca11 |
---|---|
1 using System; | |
2 using Implab.Automaton.RegularExpressions; | |
3 using Implab.Automaton; | |
4 | |
5 namespace Implab.Formats { | |
6 public struct BufferScanner<TTag> { | |
7 char[] m_buffer; | |
8 int m_offset; | |
9 int m_position; | |
10 int m_hi; | |
11 | |
12 readonly int m_chunk; | |
13 readonly int m_limit; | |
14 | |
15 readonly DFAStateDescriptor<TTag>[] m_dfa; | |
16 int m_state; | |
17 | |
18 public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) { | |
19 m_dfa = dfa; | |
20 m_state = initialState; | |
21 m_chunk = chunk; | |
22 m_limit = limit; | |
23 m_buffer = null; | |
24 m_offset = 0; | |
25 m_position = 0; | |
26 m_hi = 0; | |
27 } | |
28 | |
29 public char[] Buffer { | |
30 get { | |
31 return m_buffer; | |
32 } | |
33 } | |
34 | |
35 public int HiMark { | |
36 get { | |
37 return m_hi; | |
38 } | |
39 } | |
40 | |
41 public int Position { | |
42 get { | |
43 return m_position; | |
44 } | |
45 } | |
46 | |
47 public int Length { | |
48 get { | |
49 return m_hi - m_position; | |
50 } | |
51 } | |
52 | |
53 public int TokenOffset { | |
54 get { | |
55 return m_offset; | |
56 } | |
57 } | |
58 | |
59 public int TokenLength { | |
60 get { | |
61 return m_position - m_offset; | |
62 } | |
63 } | |
64 | |
65 public void Init(char[] buffer, int position, int length) { | |
66 m_buffer = buffer; | |
67 m_position = position; | |
68 m_offset = position; | |
69 m_hi = position + length; | |
70 } | |
71 | |
72 public int Extend() { | |
73 // free space | |
74 var free = m_buffer.Length - m_hi; | |
75 | |
76 // if the buffer have enough free space | |
77 if (free > 0) | |
78 return free; | |
79 | |
80 // effective size of the buffer | |
81 var size = m_buffer.Length - m_offset; | |
82 | |
83 // calculate the new size | |
84 int grow = Math.Min(m_limit - size, m_chunk); | |
85 if (grow <= 0) | |
86 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit)); | |
87 | |
88 var temp = new char[size + grow]; | |
89 Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset); | |
90 m_position -= m_offset; | |
91 m_hi -= m_offset; | |
92 m_offset = 0; | |
93 m_buffer = temp; | |
94 | |
95 return free + grow; | |
96 } | |
97 | |
98 public void RaiseMark(int size) { | |
99 m_hi += size; | |
100 } | |
101 | |
102 /// <summary> | |
103 /// Scan this instance. | |
104 /// </summary> | |
105 /// <returns><c>true</c> - additional data required</returns> | |
106 public bool Scan() { | |
107 while (m_position < m_hi) { | |
108 var ch = m_buffer[m_position]; | |
109 var next = m_dfa[m_state].transitions[(int)ch]; | |
110 if (next == DFAConst.UNREACHABLE_STATE) { | |
111 if (m_dfa[m_state].final) | |
112 return false; | |
113 | |
114 throw new ParserException( | |
115 String.Format( | |
116 "Unexpected token '{0}'", | |
117 new string(m_buffer, m_offset, m_position - m_offset) | |
118 ) | |
119 ); | |
120 } | |
121 m_state = next; | |
122 m_position++; | |
123 } | |
124 | |
125 return true; | |
126 } | |
127 | |
128 public void Eof() { | |
129 if (!m_dfa[m_state].final) | |
130 throw new ParserException( | |
131 String.Format( | |
132 "Unexpected token '{0}'", | |
133 new string(m_buffer, m_offset, m_position - m_offset) | |
134 ) | |
135 ); | |
136 } | |
137 | |
138 public TTag[] GetTokenTags() { | |
139 return m_dfa[m_state].tags; | |
140 } | |
141 } | |
142 } | |
143 |