view Implab/Formats/JSON/JSONGrammar.cs @ 183:4f82e0f161c3 ref20160224

fixed DFA optimization, JSON is fully functional
author cin
date Fri, 25 Mar 2016 02:49:02 +0300
parents 76e8f2ba12b8
children
line wrap: on
line source

using System.Linq;
using Implab.Automaton.RegularExpressions;
using System;
using Implab.Automaton;
using Implab.Components;

namespace Implab.Formats.JSON {
    class JSONGrammar : Grammar<char> {
        public enum TokenType {
            None,
            BeginObject,
            EndObject,
            BeginArray,
            EndArray,
            String,
            Number,
            Literal,
            NameSeparator,
            ValueSeparator,
            Whitespace,

            StringBound,
            EscapedChar,
            UnescapedChar,
            EscapedUnicode
        }

        static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar());

        public static JSONGrammar Instance {
            get { return _instance.Value; }
        }

        readonly ScannerContext<TokenType> m_jsonExpression;
        readonly ScannerContext<TokenType> m_stringExpression;
        readonly CharAlphabet m_defaultAlphabet = new CharAlphabet();

        public JSONGrammar() {
            DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
            var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
            var digit9 = SymbolRangeToken('1', '9');
            var zero = SymbolToken('0');
            var digit = zero.Or(digit9);
            var dot = SymbolToken('.');
            var minus = SymbolToken('-');
            var sign = SymbolSetToken('-', '+');
            var expSign = SymbolSetToken('e', 'E');
            var letters = SymbolRangeToken('a', 'z');
            var integer = zero.Or(digit9.Cat(digit.EClosure()));
            var frac = dot.Cat(digit.Closure());
            var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
            var quote = SymbolToken('"');
            var backSlash = SymbolToken('\\');
            var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
            var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
            var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
            var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
            var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
            var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
            var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
            var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
            var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
            
            var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
            var literal = letters.Closure();
            var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));

            var jsonExpression =
                number.Tag(TokenType.Number)
                .Or(literal.Tag(TokenType.Literal))
                .Or(quote.Tag(TokenType.StringBound))
                .Or(beginObject.Tag(TokenType.BeginObject))
                .Or(endObject.Tag(TokenType.EndObject))
                .Or(beginArray.Tag(TokenType.BeginArray))
                .Or(endArray.Tag(TokenType.EndArray))
                .Or(nameSep.Tag(TokenType.NameSeparator))
                .Or(valueSep.Tag(TokenType.ValueSeparator))
                .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace));


            var jsonStringExpression =
                quote.Tag(TokenType.StringBound)
                .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
                .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
                .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
                    

            m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression);
            m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression);


        }

        protected override IAlphabetBuilder<char> AlphabetBuilder {
            get {
                return m_defaultAlphabet;
            }
        }

        public ScannerContext<TokenType> JsonExpression {
            get {
                return m_jsonExpression;
            }
        }

        public ScannerContext<TokenType> JsonStringExpression {
            get {
                return m_stringExpression;
            }
        }

        Token SymbolRangeToken(char start, char stop) {
            return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
        }

        protected override IndexedAlphabetBase<char> CreateAlphabet() {
            return new CharAlphabet();
        }
                
    }
}