annotate Implab/Formats/BufferScanner.cs @ 173:ecfece82ca11 ref20160224

Working on text scanner
author cin
date Tue, 15 Mar 2016 02:11:06 +0300
parents
children 983df35b3ca1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
173
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
1 using System;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
2 using Implab.Automaton.RegularExpressions;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
3 using Implab.Automaton;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
4
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
5 namespace Implab.Formats {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
6 public struct BufferScanner<TTag> {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
7 char[] m_buffer;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
8 int m_offset;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
9 int m_position;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
10 int m_hi;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
11
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
12 readonly int m_chunk;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
13 readonly int m_limit;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
14
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
15 readonly DFAStateDescriptor<TTag>[] m_dfa;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
16 int m_state;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
17
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
18 public BufferScanner(DFAStateDescriptor<TTag>[] dfa, int initialState, int chunk, int limit) {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
19 m_dfa = dfa;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
20 m_state = initialState;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
21 m_chunk = chunk;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
22 m_limit = limit;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
23 m_buffer = null;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
24 m_offset = 0;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
25 m_position = 0;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
26 m_hi = 0;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
27 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
28
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
29 public char[] Buffer {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
30 get {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
31 return m_buffer;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
32 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
33 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
34
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
35 public int HiMark {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
36 get {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
37 return m_hi;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
38 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
39 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
40
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
41 public int Position {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
42 get {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
43 return m_position;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
44 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
45 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
46
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
47 public int Length {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
48 get {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
49 return m_hi - m_position;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
50 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
51 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
52
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
53 public int TokenOffset {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
54 get {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
55 return m_offset;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
56 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
57 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
58
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
59 public int TokenLength {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
60 get {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
61 return m_position - m_offset;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
62 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
63 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
64
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
65 public void Init(char[] buffer, int position, int length) {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
66 m_buffer = buffer;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
67 m_position = position;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
68 m_offset = position;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
69 m_hi = position + length;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
70 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
71
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
72 public int Extend() {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
73 // free space
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
74 var free = m_buffer.Length - m_hi;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
75
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
76 // if the buffer have enough free space
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
77 if (free > 0)
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
78 return free;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
79
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
80 // effective size of the buffer
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
81 var size = m_buffer.Length - m_offset;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
82
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
83 // calculate the new size
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
84 int grow = Math.Min(m_limit - size, m_chunk);
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
85 if (grow <= 0)
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
86 throw new ParserException(String.Format("Input buffer {0} bytes limit exceeded", m_limit));
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
87
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
88 var temp = new char[size + grow];
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
89 Array.Copy(m_buffer, m_offset, temp, 0, m_hi - m_offset);
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
90 m_position -= m_offset;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
91 m_hi -= m_offset;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
92 m_offset = 0;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
93 m_buffer = temp;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
94
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
95 return free + grow;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
96 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
97
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
98 public void RaiseMark(int size) {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
99 m_hi += size;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
100 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
101
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
102 /// <summary>
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
103 /// Scan this instance.
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
104 /// </summary>
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
105 /// <returns><c>true</c> - additional data required</returns>
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
106 public bool Scan() {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
107 while (m_position < m_hi) {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
108 var ch = m_buffer[m_position];
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
109 var next = m_dfa[m_state].transitions[(int)ch];
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
110 if (next == DFAConst.UNREACHABLE_STATE) {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
111 if (m_dfa[m_state].final)
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
112 return false;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
113
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
114 throw new ParserException(
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
115 String.Format(
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
116 "Unexpected token '{0}'",
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
117 new string(m_buffer, m_offset, m_position - m_offset)
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
118 )
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
119 );
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
120 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
121 m_state = next;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
122 m_position++;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
123 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
124
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
125 return true;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
126 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
127
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
128 public void Eof() {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
129 if (!m_dfa[m_state].final)
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
130 throw new ParserException(
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
131 String.Format(
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
132 "Unexpected token '{0}'",
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
133 new string(m_buffer, m_offset, m_position - m_offset)
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
134 )
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
135 );
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
136 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
137
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
138 public TTag[] GetTokenTags() {
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
139 return m_dfa[m_state].tags;
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
140 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
141 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
142 }
ecfece82ca11 Working on text scanner
cin
parents:
diff changeset
143