annotate Implab/Formats/JSON/JSONGrammar.cs @ 170:181119ef3b39 ref20160224

DFA refactoring, rx based dfa.
author cin
date Fri, 04 Mar 2016 01:56:31 +0300
parents e227e78d72e4
children 92d5278d1b10
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
1 using System.Linq;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
2 using Implab.Automaton.RegularExpressions;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
3 using System;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
4
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
5 namespace Implab.Formats.JSON {
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
6 class JSONGrammar : Grammar<char,JSONGrammar.TokenType> {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
7 public enum TokenType {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
8 None,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
9 BeginObject,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
10 EndObject,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
11 BeginArray,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
12 EndArray,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
13 String,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
14 Number,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
15 Literal,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
16 NameSeparator,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
17 ValueSeparator,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
18
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
19 StringBound,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
20 EscapedChar,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
21 UnescapedChar,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
22 EscapedUnicode,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
23
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
24 Minus,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
25 Plus,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
26 Sign,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
27 Integer,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
28 Dot,
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
29 Exp
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
30 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
31
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
32 static Lazy<JSONGrammar> _instance = new Lazy<JSONGrammar>();
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
33
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
34 public static JSONGrammar Instance {
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
35 get { return _instance.Value; }
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
36 }
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
37
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
38 readonly RegularCharDFADefinition<TokenType> m_jsonDFA;
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
39 readonly RegularCharDFADefinition<TokenType> m_stringDFA;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
40
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
41 public JSONGrammar() {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
42 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
43 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
44 var digit9 = SymbolRangeToken('1', '9');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
45 var zero = SymbolToken('0');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
46 var digit = zero.Or(digit9);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
47 var dot = SymbolToken('.');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
48 var minus = SymbolToken('-');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
49 var sign = SymbolSetToken('-', '+');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
50 var expSign = SymbolSetToken('e', 'E');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
51 var letters = SymbolRangeToken('a', 'z');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
52 var integer = zero.Or(digit9.Cat(digit.EClosure()));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
53 var frac = dot.Cat(digit.Closure());
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
54 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
55 var quote = SymbolToken('"');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
56 var backSlash = SymbolToken('\\');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
57 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
58 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
59 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
60 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
61 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
62 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
63 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
64 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
65 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
66
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
67 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
68 var literal = letters.Closure();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
69 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
70
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
71 var jsonExpression =
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
72 number.Tag(TokenType.Number)
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
73 .Or(literal.Tag(TokenType.Literal))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
74 .Or(quote.Tag(TokenType.StringBound))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
75 .Or(beginObject.Tag(TokenType.BeginObject))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
76 .Or(endObject.Tag(TokenType.EndObject))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
77 .Or(beginArray.Tag(TokenType.BeginArray))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
78 .Or(endArray.Tag(TokenType.EndArray))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
79 .Or(nameSep.Tag(TokenType.NameSeparator))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
80 .Or(valueSep.Tag(TokenType.ValueSeparator));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
81
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
82
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
83 var jsonStringExpression =
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
84 quote.Tag(TokenType.StringBound)
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
85 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
86 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
87 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
88
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
89
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
90 m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet());
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
91 BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
92
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
93
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
94 m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet());
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
95 BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
96 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
97
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
98 public RegularCharDFADefinition<TokenType> JsonDFA {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
99 get {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
100 return m_jsonDFA;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
101 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
102 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
103
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
104 public RegularDFADefinition<char,TokenType> JsonStringDFA {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
105 get {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
106 return m_stringDFA;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
107 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
108 }
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
109
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
110 Token<TokenType> SymbolRangeToken(char start, char stop) {
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
111 return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
112 }
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
113
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
114 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
115 }