Mercurial > pub > ImplabNet
annotate Implab/JSON/JSONParser.cs @ 89:ce0171cacec4 v2
improved performance of a chained map operation
author | cin |
---|---|
date | Wed, 08 Oct 2014 02:19:45 +0400 |
parents | 0349189d2564 |
children | f973c5df9972 |
rev | line source |
---|---|
55 | 1 using Implab; |
2 using Implab.Parsing; | |
3 using System; | |
4 using System.Collections.Generic; | |
5 using System.Diagnostics; | |
59 | 6 using System.IO; |
55 | 7 using System.Linq; |
8 using System.Text; | |
9 using System.Threading.Tasks; | |
10 | |
11 namespace Implab.JSON { | |
12 /// <summary> | |
13 /// internal | |
14 /// </summary> | |
15 public struct JSONParserContext { | |
16 public string memberName; | |
17 public JSONElementContext elementContext; | |
18 } | |
19 | |
20 /// <summary> | |
21 /// Pull парсер JSON данных. | |
22 /// </summary> | |
70 | 23 /// <remarks> |
24 /// Следует отметить отдельную интерпретацию свойства <see cref="Level"/>, | |
25 /// оно означает текущий уровень вложенности объектов, однако закрывающий | |
26 /// элемент объекта и массива имеет уровень меньше, чем сам объект. | |
27 /// <code> | |
28 /// { // Level = 1 | |
29 /// "name" : "Peter", // Level = 1 | |
30 /// "address" : { // Level = 2 | |
31 /// city : "Stern" // Level = 2 | |
32 /// } // Level = 1 | |
33 /// } // Level = 0 | |
34 /// </code> | |
35 /// </remarks> | |
59 | 36 public class JSONParser : DFAutomaton<JSONParserContext>, IDisposable { |
55 | 37 |
38 enum MemberContext { | |
39 MemberName, | |
40 MemberValue | |
59 | 41 } |
55 | 42 |
43 static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; | |
44 static readonly DFAStateDescriptior[] _jsonDFA; | |
45 static readonly DFAStateDescriptior[] _objectDFA; | |
46 static readonly DFAStateDescriptior[] _arrayDFA; | |
47 | |
48 static JSONParser() { | |
49 var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); | |
50 | |
51 var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); | |
52 var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); | |
53 var objectExpression = memberExpression | |
54 .Cat( | |
55 Token.New(JsonTokenType.ValueSeparator) | |
56 .Cat(memberExpression) | |
57 .EClosure() | |
58 ) | |
59 .Optional() | |
60 .Cat(Token.New(JsonTokenType.EndObject)) | |
61 .Tag(0); | |
62 var arrayExpression = valueExpression | |
63 .Cat( | |
64 Token.New(JsonTokenType.ValueSeparator) | |
65 .Cat(valueExpression) | |
66 .EClosure() | |
67 ) | |
68 .Optional() | |
69 .Cat(Token.New(JsonTokenType.EndArray)) | |
70 .Tag(0); | |
71 | |
72 _jsonDFA = BuildDFA(jsonExpression).States; | |
73 _objectDFA = BuildDFA(objectExpression).States; | |
74 _arrayDFA = BuildDFA(arrayExpression).States; | |
75 } | |
76 | |
77 static EDFADefinition<JsonTokenType> BuildDFA(Token expr) { | |
78 var builder = new DFABuilder(); | |
79 var dfa = new EDFADefinition<JsonTokenType>(_alphabet); | |
80 expr.Accept(builder); | |
81 | |
82 builder.BuildDFA(dfa); | |
83 return dfa; | |
84 } | |
85 | |
86 JSONScanner m_scanner; | |
87 MemberContext m_memberContext; | |
88 | |
89 JSONElementType m_elementType; | |
90 object m_elementValue; | |
91 | |
59 | 92 /// <summary> |
93 /// Создает новый парсер на основе строки, содержащей JSON | |
94 /// </summary> | |
95 /// <param name="text"></param> | |
55 | 96 public JSONParser(string text) |
59 | 97 : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { |
55 | 98 Safe.ArgumentNotEmpty(text, "text"); |
99 m_scanner = new JSONScanner(); | |
100 m_scanner.Feed(text.ToCharArray()); | |
101 } | |
102 | |
59 | 103 /// <summary> |
104 /// Создает новый экземпляр парсера, на основе текстового потока. | |
105 /// </summary> | |
106 /// <param name="reader">Текстовый поток.</param> | |
107 /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param> | |
108 public JSONParser(TextReader reader, bool dispose) | |
109 : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { | |
110 Safe.ArgumentNotNull(reader, "reader"); | |
111 m_scanner = new JSONScanner(); | |
112 m_scanner.Feed(reader, dispose); | |
113 } | |
114 | |
115 /// <summary> | |
116 /// Тип текущего элемента на котором стоит парсер. | |
117 /// </summary> | |
55 | 118 public JSONElementType ElementType { |
119 get { return m_elementType; } | |
120 } | |
121 | |
59 | 122 /// <summary> |
123 /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда | |
124 /// пустая строка. | |
125 /// </summary> | |
55 | 126 public string ElementName { |
127 get { return m_context.info.memberName; } | |
128 } | |
129 | |
59 | 130 /// <summary> |
131 /// Значение элемента. Только для элементов типа <see cref="JSONElementType.Value"/>, для остальных <c>null</c> | |
132 /// </summary> | |
55 | 133 public object ElementValue { |
134 get { return m_elementValue; } | |
135 } | |
136 | |
59 | 137 /// <summary> |
138 /// Читает слеюудущий объект из потока | |
139 /// </summary> | |
140 /// <returns><c>true</c> - операция чтения прошла успешно, <c>false</c> - конец данных</returns> | |
55 | 141 public bool Read() { |
142 if (m_context.current == UNREACHEBLE_STATE) | |
143 throw new InvalidOperationException("The parser is in invalid state"); | |
144 object tokenValue; | |
145 JsonTokenType tokenType; | |
146 m_context.info.memberName = String.Empty; | |
147 while (m_scanner.ReadToken(out tokenValue, out tokenType)) { | |
148 Move((int)tokenType); | |
149 if (m_context.current == UNREACHEBLE_STATE) | |
150 UnexpectedToken(tokenValue, tokenType); | |
151 switch (tokenType) { | |
152 case JsonTokenType.BeginObject: | |
153 Switch( | |
154 _objectDFA, | |
155 INITIAL_STATE, | |
59 | 156 new JSONParserContext { |
55 | 157 memberName = m_context.info.memberName, |
158 elementContext = JSONElementContext.Object | |
159 } | |
160 ); | |
161 m_elementValue = null; | |
162 m_memberContext = MemberContext.MemberName; | |
163 m_elementType = JSONElementType.BeginObject; | |
164 return true; | |
165 case JsonTokenType.EndObject: | |
166 Restore(); | |
167 m_elementValue = null; | |
168 m_elementType = JSONElementType.EndObject; | |
169 return true; | |
170 case JsonTokenType.BeginArray: | |
171 Switch( | |
172 _arrayDFA, | |
173 INITIAL_STATE, | |
174 new JSONParserContext { | |
175 memberName = m_context.info.memberName, | |
176 elementContext = JSONElementContext.Array | |
177 } | |
178 ); | |
179 m_elementValue = null; | |
180 m_memberContext = MemberContext.MemberValue; | |
181 m_elementType = JSONElementType.BeginArray; | |
182 return true; | |
183 case JsonTokenType.EndArray: | |
184 Restore(); | |
185 m_elementValue = null; | |
186 m_elementType = JSONElementType.EndArray; | |
187 return true; | |
188 case JsonTokenType.String: | |
189 if (m_memberContext == MemberContext.MemberName) { | |
190 m_context.info.memberName = (string)tokenValue; | |
191 break; | |
192 } else { | |
193 m_elementType = JSONElementType.Value; | |
194 m_elementValue = tokenValue; | |
195 return true; | |
196 } | |
197 case JsonTokenType.Number: | |
198 m_elementType = JSONElementType.Value; | |
199 m_elementValue = tokenValue; | |
200 return true; | |
201 case JsonTokenType.Literal: | |
202 m_elementType = JSONElementType.Value; | |
203 m_elementValue = ParseLiteral((string)tokenValue); | |
204 return true; | |
205 case JsonTokenType.NameSeparator: | |
206 m_memberContext = MemberContext.MemberValue; | |
207 break; | |
208 case JsonTokenType.ValueSeparator: | |
209 m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; | |
210 break; | |
211 default: | |
212 UnexpectedToken(tokenValue, tokenType); | |
213 break; | |
214 } | |
215 } | |
216 if (m_context.info.elementContext != JSONElementContext.None) | |
217 throw new ParserException("Unexpedted end of data"); | |
218 return false; | |
219 } | |
220 | |
221 object ParseLiteral(string literal) { | |
222 switch (literal) { | |
223 case "null": | |
224 return null; | |
59 | 225 case "false": |
55 | 226 return false; |
227 case "true": | |
228 return true; | |
229 default: | |
230 UnexpectedToken(literal, JsonTokenType.Literal); | |
231 return null; // avoid compliler error | |
232 } | |
233 } | |
234 | |
235 void UnexpectedToken(object value, JsonTokenType tokenType) { | |
236 throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); | |
237 } | |
238 | |
57 | 239 |
59 | 240 /// <summary> |
241 /// Признак конца потока | |
242 /// </summary> | |
57 | 243 public bool EOF { |
244 get { | |
245 return m_scanner.EOF; | |
246 } | |
247 } | |
59 | 248 |
249 protected virtual void Dispose(bool disposing) { | |
250 if (disposing) { | |
251 m_scanner.Dispose(); | |
252 } | |
253 } | |
254 | |
255 /// <summary> | |
256 /// Освобождает парсер и связанный с ним сканнер. | |
257 /// </summary> | |
258 public void Dispose() { | |
259 Dispose(true); | |
260 GC.SuppressFinalize(this); | |
261 } | |
262 | |
263 ~JSONParser() { | |
264 Dispose(false); | |
265 } | |
62
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
266 |
70 | 267 /// <summary> |
268 /// Переходит в конец текущего объекта. | |
269 /// </summary> | |
270 public void SeekElementEnd() { | |
271 var level = Level - 1; | |
62
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
272 |
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
273 Debug.Assert(level >= 0); |
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
274 |
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
275 while (Level != level) |
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
276 Read(); |
62b440d46313
Added Skip method to JSON parser to skip contents of the current node
cin
parents:
59
diff
changeset
|
277 } |
55 | 278 } |
279 | |
280 } |