Mercurial > pub > ImplabNet
comparison Implab/Formats/Json/JsonGrammar.cs @ 235:b49969a7043c v2
Слияние
| author | cin |
|---|---|
| date | Thu, 05 Oct 2017 09:24:49 +0300 |
| parents | 3e26338eb977 |
| children | 302ca905c19e |
comparison
equal
deleted
inserted
replaced
| 234:8dd666e6b6bf | 235:b49969a7043c |
|---|---|
| 1 using System.Linq; | |
| 2 using Implab.Automaton.RegularExpressions; | |
| 3 using System; | |
| 4 using Implab.Automaton; | |
| 5 using Implab.Components; | |
| 6 | |
| 7 namespace Implab.Formats.Json { | |
| 8 public class JsonGrammar : Grammar<char> { | |
| 9 public enum TokenType { | |
| 10 None, | |
| 11 BeginObject, | |
| 12 EndObject, | |
| 13 BeginArray, | |
| 14 EndArray, | |
| 15 String, | |
| 16 Number, | |
| 17 Literal, | |
| 18 NameSeparator, | |
| 19 ValueSeparator, | |
| 20 Whitespace, | |
| 21 | |
| 22 StringBound, | |
| 23 EscapedChar, | |
| 24 UnescapedChar, | |
| 25 EscapedUnicode | |
| 26 } | |
| 27 | |
| 28 static LazyAndWeak<JsonGrammar> _instance = new LazyAndWeak<JsonGrammar>(() => new JsonGrammar()); | |
| 29 | |
| 30 public static JsonGrammar Instance { | |
| 31 get { return _instance.Value; } | |
| 32 } | |
| 33 | |
| 34 readonly InputScanner<TokenType> m_jsonExpression; | |
| 35 readonly InputScanner<TokenType> m_stringExpression; | |
| 36 readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); | |
| 37 | |
| 38 public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } } | |
| 39 | |
| 40 public JsonGrammar() { | |
| 41 DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); | |
| 42 var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); | |
| 43 var digit9 = SymbolRangeToken('1', '9'); | |
| 44 var zero = SymbolToken('0'); | |
| 45 var digit = zero.Or(digit9); | |
| 46 var dot = SymbolToken('.'); | |
| 47 var minus = SymbolToken('-'); | |
| 48 var sign = SymbolSetToken('-', '+'); | |
| 49 var expSign = SymbolSetToken('e', 'E'); | |
| 50 var letters = SymbolRangeToken('a', 'z'); | |
| 51 var integer = zero.Or(digit9.Cat(digit.EClosure())); | |
| 52 var frac = dot.Cat(digit.Closure()); | |
| 53 var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); | |
| 54 var quote = SymbolToken('"'); | |
| 55 var backSlash = SymbolToken('\\'); | |
| 56 var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); | |
| 57 var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); | |
| 58 var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); | |
| 59 var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); | |
| 60 var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); | |
| 61 var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); | |
| 62 var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); | |
| 63 var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); | |
| 64 var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); | |
| 65 | |
| 66 var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); | |
| 67 var literal = letters.Closure(); | |
| 68 var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); | |
| 69 | |
| 70 var jsonExpression = | |
| 71 number.Tag(TokenType.Number) | |
| 72 .Or(literal.Tag(TokenType.Literal)) | |
| 73 .Or(quote.Tag(TokenType.StringBound)) | |
| 74 .Or(beginObject.Tag(TokenType.BeginObject)) | |
| 75 .Or(endObject.Tag(TokenType.EndObject)) | |
| 76 .Or(beginArray.Tag(TokenType.BeginArray)) | |
| 77 .Or(endArray.Tag(TokenType.EndArray)) | |
| 78 .Or(nameSep.Tag(TokenType.NameSeparator)) | |
| 79 .Or(valueSep.Tag(TokenType.ValueSeparator)) | |
| 80 .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace)); | |
| 81 | |
| 82 | |
| 83 var jsonStringExpression = | |
| 84 quote.Tag(TokenType.StringBound) | |
| 85 .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) | |
| 86 .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) | |
| 87 .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); | |
| 88 | |
| 89 | |
| 90 m_jsonExpression = BuildScanner(jsonExpression); | |
| 91 m_stringExpression = BuildScanner(jsonStringExpression); | |
| 92 } | |
| 93 | |
| 94 public static InputScanner<TokenType> CreateJsonExpressionScanner() { | |
| 95 return Instance.m_jsonExpression.Clone(); | |
| 96 } | |
| 97 | |
| 98 public static InputScanner<TokenType> CreateStringExpressionScanner() { | |
| 99 return Instance.m_stringExpression.Clone(); | |
| 100 } | |
| 101 | |
| 102 protected override IAlphabetBuilder<char> AlphabetBuilder { | |
| 103 get { | |
| 104 return m_defaultAlphabet; | |
| 105 } | |
| 106 } | |
| 107 | |
| 108 Token SymbolRangeToken(char start, char stop) { | |
| 109 return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); | |
| 110 } | |
| 111 | |
| 112 public InputScanner<TokenType> BuildScanner(Token regexp) { | |
| 113 var dfa = new RegularDFA<char, TokenType>(AlphabetBuilder); | |
| 114 | |
| 115 var visitor = new RegularExpressionVisitor<TokenType>(dfa); | |
| 116 regexp.Accept(visitor); | |
| 117 visitor.BuildDFA(); | |
| 118 | |
| 119 if (dfa.IsFinalState(dfa.InitialState)) | |
| 120 throw new ApplicationException("The specified language contains empty token"); | |
| 121 | |
| 122 var ab = new CharAlphabet(); | |
| 123 var optimal = dfa.Optimize(ab); | |
| 124 | |
| 125 return new InputScanner<TokenType>( | |
| 126 optimal.CreateTransitionTable(), | |
| 127 optimal.CreateFinalStateTable(), | |
| 128 NormalizeTags(optimal.CreateTagTable()), | |
| 129 optimal.InitialState, | |
| 130 ab.CreateCharMap() | |
| 131 ); | |
| 132 } | |
| 133 | |
| 134 static TokenType[] NormalizeTags(TokenType[][] tags) { | |
| 135 var result = new TokenType[tags.Length]; | |
| 136 for(var i = 0; i< tags.Length; i++) { | |
| 137 if (tags[i] == null || tags[i].Length == 0) | |
| 138 result[i] = default(TokenType); | |
| 139 else if (tags[i].Length == 1) | |
| 140 result[i] = tags[i][0]; | |
| 141 else | |
| 142 throw new Exception($"Ambigous state tags {string.Join(", ", tags[i])}"); | |
| 143 } | |
| 144 return result; | |
| 145 } | |
| 146 | |
| 147 } | |
| 148 } |
