annotate Implab/Formats/JSON/JSONGrammar.cs @ 187:dd4a3590f9c6 ref20160224

Reworked cancelation handling, if the cancel handler isn't specified the OperationCanceledException will be handled by the error handler Any unhandled OperationCanceledException will cause the promise cancelation
author cin
date Tue, 19 Apr 2016 17:35:20 +0300
parents 4f82e0f161c3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
1 using System.Linq;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
2 using Implab.Automaton.RegularExpressions;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
3 using System;
172
92d5278d1b10 Working on text scanner
cin
parents: 165
diff changeset
4 using Implab.Automaton;
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
5 using Implab.Components;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
6
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
7 namespace Implab.Formats.JSON {
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
8 class JSONGrammar : Grammar<char> {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
9 public enum TokenType {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
10 None,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
11 BeginObject,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
12 EndObject,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
13 BeginArray,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
14 EndArray,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
15 String,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
16 Number,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
17 Literal,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
18 NameSeparator,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
19 ValueSeparator,
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
20 Whitespace,
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
21
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
22 StringBound,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
23 EscapedChar,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
24 UnescapedChar,
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
25 EscapedUnicode
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
26 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
27
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
28 static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
29
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
30 public static JSONGrammar Instance {
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
31 get { return _instance.Value; }
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
32 }
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
33
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
34 readonly ScannerContext<TokenType> m_jsonExpression;
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
35 readonly ScannerContext<TokenType> m_stringExpression;
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
36 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
37
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
38 public JSONGrammar() {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
39 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
40 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
41 var digit9 = SymbolRangeToken('1', '9');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
42 var zero = SymbolToken('0');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
43 var digit = zero.Or(digit9);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
44 var dot = SymbolToken('.');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
45 var minus = SymbolToken('-');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
46 var sign = SymbolSetToken('-', '+');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
47 var expSign = SymbolSetToken('e', 'E');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
48 var letters = SymbolRangeToken('a', 'z');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
49 var integer = zero.Or(digit9.Cat(digit.EClosure()));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
50 var frac = dot.Cat(digit.Closure());
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
51 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
52 var quote = SymbolToken('"');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
53 var backSlash = SymbolToken('\\');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
54 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
55 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
56 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
57 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
58 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
59 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
60 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
61 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
62 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
63
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
64 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
65 var literal = letters.Closure();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
66 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
67
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
68 var jsonExpression =
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
69 number.Tag(TokenType.Number)
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
70 .Or(literal.Tag(TokenType.Literal))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
71 .Or(quote.Tag(TokenType.StringBound))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
72 .Or(beginObject.Tag(TokenType.BeginObject))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
73 .Or(endObject.Tag(TokenType.EndObject))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
74 .Or(beginArray.Tag(TokenType.BeginArray))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
75 .Or(endArray.Tag(TokenType.EndArray))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
76 .Or(nameSep.Tag(TokenType.NameSeparator))
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
77 .Or(valueSep.Tag(TokenType.ValueSeparator))
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
78 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
79
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
80
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
81 var jsonStringExpression =
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
82 quote.Tag(TokenType.StringBound)
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
83 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
84 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
85 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
86
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
87
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
88 m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
89 m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
90
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
91
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
92 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
93
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
94 protected override IAlphabetBuilder<char> AlphabetBuilder {
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
95 get {
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
96 return m_defaultAlphabet;
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
97 }
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
98 }
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
99
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
100 public ScannerContext<TokenType> JsonExpression {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
101 get {
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
102 return m_jsonExpression;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
103 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
104 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
105
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
106 public ScannerContext<TokenType> JsonStringExpression {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
107 get {
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
108 return m_stringExpression;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
109 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
110 }
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
111
178
d5c5db0335ee working on JSON parser
cin
parents: 176
diff changeset
112 Token SymbolRangeToken(char start, char stop) {
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
113 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
114 }
172
92d5278d1b10 Working on text scanner
cin
parents: 165
diff changeset
115
180
c32688129f14 refactoring complete, JSONParser rewritten
cin
parents: 178
diff changeset
116 protected override IndexedAlphabetBase<char> CreateAlphabet() {
172
92d5278d1b10 Working on text scanner
cin
parents: 165
diff changeset
117 return new CharAlphabet();
92d5278d1b10 Working on text scanner
cin
parents: 165
diff changeset
118 }
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
119
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
120 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
121 }