163
|
1 using System.Linq;
|
|
2 using Implab.Automaton.RegularExpressions;
|
165
|
3 using System;
|
163
|
4
|
|
5 namespace Implab.Formats.JSON {
|
165
|
6 class JSONGrammar : Grammar<char,JSONGrammar.TokenType> {
|
163
|
7 public enum TokenType {
|
|
8 None,
|
|
9 BeginObject,
|
|
10 EndObject,
|
|
11 BeginArray,
|
|
12 EndArray,
|
|
13 String,
|
|
14 Number,
|
|
15 Literal,
|
|
16 NameSeparator,
|
|
17 ValueSeparator,
|
|
18
|
|
19 StringBound,
|
|
20 EscapedChar,
|
|
21 UnescapedChar,
|
|
22 EscapedUnicode,
|
|
23
|
|
24 Minus,
|
|
25 Plus,
|
|
26 Sign,
|
|
27 Integer,
|
|
28 Dot,
|
|
29 Exp
|
|
30 }
|
|
31
|
165
|
32 static Lazy<JSONGrammar> _instance = new Lazy<JSONGrammar>();
|
|
33
|
|
34 public static JSONGrammar Instance {
|
|
35 get { return _instance.Value; }
|
|
36 }
|
|
37
|
|
38 readonly RegularCharDFADefinition<TokenType> m_jsonDFA;
|
|
39 readonly RegularCharDFADefinition<TokenType> m_stringDFA;
|
163
|
40
|
|
41 public JSONGrammar() {
|
|
42 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
|
|
43 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
|
|
44 var digit9 = SymbolRangeToken('1', '9');
|
|
45 var zero = SymbolToken('0');
|
|
46 var digit = zero.Or(digit9);
|
|
47 var dot = SymbolToken('.');
|
|
48 var minus = SymbolToken('-');
|
|
49 var sign = SymbolSetToken('-', '+');
|
|
50 var expSign = SymbolSetToken('e', 'E');
|
|
51 var letters = SymbolRangeToken('a', 'z');
|
|
52 var integer = zero.Or(digit9.Cat(digit.EClosure()));
|
|
53 var frac = dot.Cat(digit.Closure());
|
|
54 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
|
|
55 var quote = SymbolToken('"');
|
|
56 var backSlash = SymbolToken('\\');
|
|
57 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
|
|
58 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
|
|
59 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
|
|
60 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
|
|
61 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
|
|
62 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
|
|
63 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
|
|
64 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
|
|
65 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
|
|
66
|
|
67 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
|
|
68 var literal = letters.Closure();
|
|
69 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
|
|
70
|
|
71 var jsonExpression =
|
|
72 number.Tag(TokenType.Number)
|
|
73 .Or(literal.Tag(TokenType.Literal))
|
|
74 .Or(quote.Tag(TokenType.StringBound))
|
|
75 .Or(beginObject.Tag(TokenType.BeginObject))
|
|
76 .Or(endObject.Tag(TokenType.EndObject))
|
|
77 .Or(beginArray.Tag(TokenType.BeginArray))
|
|
78 .Or(endArray.Tag(TokenType.EndArray))
|
|
79 .Or(nameSep.Tag(TokenType.NameSeparator))
|
|
80 .Or(valueSep.Tag(TokenType.ValueSeparator));
|
|
81
|
|
82
|
|
83 var jsonStringExpression =
|
|
84 quote.Tag(TokenType.StringBound)
|
|
85 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
|
|
86 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
|
|
87 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
|
|
88
|
|
89
|
165
|
90 m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet());
|
|
91 BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
|
|
92
|
|
93
|
|
94 m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet());
|
|
95 BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
|
163
|
96 }
|
|
97
|
165
|
98 public RegularCharDFADefinition<TokenType> JsonDFA {
|
163
|
99 get {
|
|
100 return m_jsonDFA;
|
|
101 }
|
|
102 }
|
|
103
|
165
|
104 public RegularDFADefinition<char,TokenType> JsonStringDFA {
|
163
|
105 get {
|
|
106 return m_stringDFA;
|
|
107 }
|
|
108 }
|
165
|
109
|
|
110 Token<TokenType> SymbolRangeToken(char start, char stop) {
|
|
111 return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
|
|
112 }
|
|
113
|
163
|
114 }
|
|
115 }
|