view Implab/JSON/JSONGrammar.cs @ 131:b5c2d609d71b v2

minor changes
author cin
date Sat, 07 Feb 2015 11:06:42 +0300
parents d67b95eddaf4
children 97fbbf816844
line wrap: on
line source

using Implab.Parsing;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Implab.JSON {
    internal class JSONGrammar : Grammar<JSONGrammar> {
        public enum TokenType : int {
            None,
            BeginObject,
            EndObject,
            BeginArray,
            EndArray,
            String,
            Number,
            Literal,
            NameSeparator,
            ValueSeparator,

            StringBound,
            EscapedChar,
            UnescapedChar,
            EscapedUnicode,

            Minus,
            Plus,
            Sign,
            Integer,
            Dot,
            Exp
        }

        readonly CDFADefinition m_jsonDFA;
        readonly CDFADefinition m_stringDFA;

        public JSONGrammar() {
            DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
            var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
            var digit9 = SymbolRangeToken('1', '9');
            var zero = SymbolToken('0');
            var digit = zero.Or(digit9);
            var dot = SymbolToken('.');
            var minus = SymbolToken('-');
            var sign = SymbolSetToken('-', '+');
            var expSign = SymbolSetToken('e', 'E');
            var letters = SymbolRangeToken('a', 'z');
            var integer = zero.Or(digit9.Cat(digit.EClosure()));
            var frac = dot.Cat(digit.Closure());
            var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
            var quote = SymbolToken('"');
            var backSlash = SymbolToken('\\');
            var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
            var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
            var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
            var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
            var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
            var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
            var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
            var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
            var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
            
            var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
            var literal = letters.Closure();
            var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));

            var jsonExpression =
                number.Tag(TokenType.Number)
                .Or(literal.Tag(TokenType.Literal))
                .Or(quote.Tag(TokenType.StringBound))
                .Or(beginObject.Tag(TokenType.BeginObject))
                .Or(endObject.Tag(TokenType.EndObject))
                .Or(beginArray.Tag(TokenType.BeginArray))
                .Or(endArray.Tag(TokenType.EndArray))
                .Or(nameSep.Tag(TokenType.NameSeparator))
                .Or(valueSep.Tag(TokenType.ValueSeparator));


            var jsonStringExpression =
                quote.Tag(TokenType.StringBound)
                .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
                .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
                .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
                    

            m_jsonDFA = BuildDFA(jsonExpression);
            m_stringDFA = BuildDFA(jsonStringExpression);
        }

        public CDFADefinition JsonDFA {
            get {
                return m_jsonDFA;
            }
        }

        public CDFADefinition JsonStringDFA {
            get {
                return m_stringDFA;
            }
        }
    }
}