view Implab/Formats/JSON/JSONGrammar.cs @ 170:181119ef3b39 ref20160224

DFA refactoring, rx based dfa.
author cin
date Fri, 04 Mar 2016 01:56:31 +0300
parents e227e78d72e4
children 92d5278d1b10
line wrap: on
line source

using System.Linq;
using Implab.Automaton.RegularExpressions;
using System;

namespace Implab.Formats.JSON {
    class JSONGrammar : Grammar<char,JSONGrammar.TokenType> {
        public enum TokenType {
            None,
            BeginObject,
            EndObject,
            BeginArray,
            EndArray,
            String,
            Number,
            Literal,
            NameSeparator,
            ValueSeparator,

            StringBound,
            EscapedChar,
            UnescapedChar,
            EscapedUnicode,

            Minus,
            Plus,
            Sign,
            Integer,
            Dot,
            Exp
        }

        static Lazy<JSONGrammar> _instance = new Lazy<JSONGrammar>();

        public static JSONGrammar Instance {
            get { return _instance.Value; }
        }

        readonly RegularCharDFADefinition<TokenType> m_jsonDFA;
        readonly RegularCharDFADefinition<TokenType> m_stringDFA;

        public JSONGrammar() {
            DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
            var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
            var digit9 = SymbolRangeToken('1', '9');
            var zero = SymbolToken('0');
            var digit = zero.Or(digit9);
            var dot = SymbolToken('.');
            var minus = SymbolToken('-');
            var sign = SymbolSetToken('-', '+');
            var expSign = SymbolSetToken('e', 'E');
            var letters = SymbolRangeToken('a', 'z');
            var integer = zero.Or(digit9.Cat(digit.EClosure()));
            var frac = dot.Cat(digit.Closure());
            var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
            var quote = SymbolToken('"');
            var backSlash = SymbolToken('\\');
            var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
            var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
            var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
            var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
            var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
            var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
            var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
            var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
            var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
            
            var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
            var literal = letters.Closure();
            var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));

            var jsonExpression =
                number.Tag(TokenType.Number)
                .Or(literal.Tag(TokenType.Literal))
                .Or(quote.Tag(TokenType.StringBound))
                .Or(beginObject.Tag(TokenType.BeginObject))
                .Or(endObject.Tag(TokenType.EndObject))
                .Or(beginArray.Tag(TokenType.BeginArray))
                .Or(endArray.Tag(TokenType.EndArray))
                .Or(nameSep.Tag(TokenType.NameSeparator))
                .Or(valueSep.Tag(TokenType.ValueSeparator));


            var jsonStringExpression =
                quote.Tag(TokenType.StringBound)
                .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
                .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
                .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
                    

            m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); 
            BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);


            m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet());
            BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
        }

        public RegularCharDFADefinition<TokenType> JsonDFA {
            get {
                return m_jsonDFA;
            }
        }

        public RegularDFADefinition<char,TokenType> JsonStringDFA {
            get {
                return m_stringDFA;
            }
        }

        Token<TokenType> SymbolRangeToken(char start, char stop) {
            return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
        }
                
    }
}