diff Implab/Xml/JsonXmlReader.cs @ 227:8d5de4eb9c2c v2

Reimplemented JsonXmlReader, added support for null values: JSON null values are mapped to empty nodes with 'xsi:nil' attribute set to 'true'
author cin
date Sat, 09 Sep 2017 03:53:13 +0300
parents
children 6fa235c5a760
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Xml/JsonXmlReader.cs	Sat Sep 09 03:53:13 2017 +0300
@@ -0,0 +1,629 @@
+using Implab.Formats.JSON;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.Xml;
+
+namespace Implab.Xml {
+    public class JsonXmlReader : XmlReader {
+        struct JsonContext {
+            public string localName;
+            public bool skip;
+        }
+
+        JSONParser m_parser;
+        JsonXmlReaderOptions m_options;
+        JsonXmlReaderPosition m_position = JsonXmlReaderPosition.Initial;
+        XmlNameTable m_nameTable;
+
+        readonly string m_jsonRootName;
+        readonly string m_jsonNamespace;
+        readonly string m_jsonPrefix;
+        readonly bool m_jsonFlattenArrays;
+        readonly string m_jsonArrayItemName;
+
+        string m_jsonLocalName;
+        string m_jsonValueName;
+        bool m_jsonSkip; // indicates wheather to generate closing tag for objects or arrays
+
+        readonly Stack<JsonContext> m_jsonNameStack = new Stack<JsonContext>();
+
+        XmlQualifiedName m_elementQName;
+        string m_elementPrefix;
+        int m_elementDepth;
+        bool m_elementIsEmpty;
+
+        XmlQualifiedName m_qName;
+        string m_prefix;
+        int m_xmlDepth;
+
+        XmlSimpleAttribute[] m_attributes;
+        object m_value;
+        bool m_isEmpty;
+
+        XmlNodeType m_nodeType = XmlNodeType.None;
+
+        bool m_isAttribute; // indicates that we are reading attribute nodes
+        int m_currentAttribute;
+        bool m_currentAttributeRead;
+
+
+        XmlNameContext m_context;
+        int m_nextPrefix = 1;
+
+        readonly string m_xmlnsPrefix;
+        readonly string m_xmlnsNamespace;
+        readonly string m_xsiPrefix;
+        readonly string m_xsiNamespace;
+
+
+        public JsonXmlReader(JSONParser parser, JsonXmlReaderOptions options) {
+            Safe.ArgumentNotNull(parser, nameof(parser));
+            m_parser = parser;
+
+            m_options = options ?? new JsonXmlReaderOptions();
+
+            m_jsonFlattenArrays = m_options.FlattenArrays;
+            m_nameTable = m_options.NameTable ?? new NameTable();
+
+            m_jsonRootName = m_nameTable.Add(string.IsNullOrEmpty(m_options.RootName) ? "data" : m_options.RootName);
+            m_jsonArrayItemName = m_nameTable.Add(string.IsNullOrEmpty(m_options.ArrayItemName) ? "item" : m_options.ArrayItemName);
+            m_jsonNamespace = m_nameTable.Add(m_options.NamespaceUri ?? string.Empty);
+            m_jsonPrefix = m_nameTable.Add(m_options.NodesPrefix ?? string.Empty);
+            m_xmlnsPrefix = m_nameTable.Add(XmlNameContext.XmlnsPrefix);
+            m_xmlnsNamespace = m_nameTable.Add(XmlNameContext.XmlnsNamespace);
+            m_xsiPrefix = m_nameTable.Add(XmlNameContext.XsiPrefix);
+            m_xsiNamespace = m_nameTable.Add(XmlNameContext.XsiNamespace);
+
+            // TODO validate m_jsonRootName, m_jsonArrayItemName
+
+            m_context = new XmlNameContext(null);
+        }
+
+        public override int AttributeCount {
+            get {
+                return m_attributes == null ? 0 : m_attributes.Length;
+            }
+        }
+
+        public override string BaseURI {
+            get {
+                return string.Empty;
+            }
+        }
+
+        public override int Depth {
+            get {
+                return m_xmlDepth;
+            }
+        }
+
+        public override bool EOF {
+            get {
+                return m_position == JsonXmlReaderPosition.Eof;
+            }
+        }
+
+        public override bool IsEmptyElement {
+            get { return m_isEmpty; }
+        }
+
+
+        public override string LocalName {
+            get {
+                return m_qName.Name;
+            }
+        }
+
+        public override string NamespaceURI {
+            get {
+                return m_qName.Namespace;
+            }
+        }
+
+        public override XmlNameTable NameTable {
+            get {
+                return m_nameTable;
+            }
+        }
+
+        public override XmlNodeType NodeType {
+            get {
+                return m_nodeType;
+            }
+        }
+
+        public override string Prefix {
+            get {
+                return m_prefix;
+            }
+        }
+
+        public override ReadState ReadState {
+            get {
+                switch (m_position) {
+                    case JsonXmlReaderPosition.Initial:
+                        return ReadState.Initial;
+                    case JsonXmlReaderPosition.Eof:
+                        return ReadState.EndOfFile;
+                    case JsonXmlReaderPosition.Closed:
+                        return ReadState.Closed;
+                    case JsonXmlReaderPosition.Error:
+                        return ReadState.Error;
+                    default:
+                        return ReadState.Interactive;
+                };
+            }
+        }
+
+        public override string Value {
+            get {
+                return ConvertValueToString(m_value);
+            }
+        }
+
+        static string ConvertValueToString(object value) {
+            if (value == null)
+                return string.Empty;
+
+            switch (Convert.GetTypeCode(value)) {
+                case TypeCode.Double:
+                    return ((double)value).ToString(CultureInfo.InvariantCulture);
+                case TypeCode.String:
+                    return (string)value;
+                case TypeCode.Boolean:
+                    return (bool)value ? "true" : "false";
+                default:
+                    return value.ToString();
+            }
+        }
+
+        public override string GetAttribute(int i) {
+            Safe.ArgumentInRange(i, 0, AttributeCount - 1, nameof(i));
+            return ConvertValueToString(m_attributes[i].Value);
+        }
+
+        public override string GetAttribute(string name) {
+            if (m_attributes == null)
+                return null;
+            var qName = m_context.Resolve(name);
+            var attr = Array.Find(m_attributes, x => x.QName == qName);
+            var value = ConvertValueToString(attr?.Value);
+            return value == string.Empty ? null : value;
+        }
+
+        public override string GetAttribute(string name, string namespaceURI) {
+            if (m_attributes == null)
+                return null;
+            var qName = new XmlQualifiedName(name, namespaceURI);
+            var attr = Array.Find(m_attributes, x => x.QName == qName);
+            var value = ConvertValueToString(attr?.Value);
+            return value == string.Empty ? null : value;
+        }
+
+        public override string LookupNamespace(string prefix) {
+            return m_context.ResolvePrefix(prefix);
+        }
+
+        public override bool MoveToAttribute(string name) {
+            if (m_attributes == null || m_attributes.Length == 0)
+                return false;
+
+            var qName = m_context.Resolve(name);
+            var index = Array.FindIndex(m_attributes, x => x.QName == qName);
+            if (index >= 0) {
+                MoveToAttributeImpl(index);
+                return true;
+            }
+            return false;
+        }
+
+        public override bool MoveToAttribute(string name, string ns) {
+            if (m_attributes == null || m_attributes.Length == 0)
+                return false;
+
+            var qName = m_context.Resolve(name);
+            var index = Array.FindIndex(m_attributes, x => x.QName == qName);
+            if (index >= 0) {
+                MoveToAttributeImpl(index);
+                return true;
+            }
+            return false;
+        }
+
+        void MoveToAttributeImpl(int i) {
+            if (!m_isAttribute) {
+                m_elementQName = m_qName;
+                m_elementDepth = m_xmlDepth;
+                m_elementPrefix = m_prefix;
+                m_elementIsEmpty = m_isEmpty;
+                m_isAttribute = true;
+            }
+            
+            var attr = m_attributes[i];
+
+
+            m_currentAttribute = i;
+            m_currentAttributeRead = false;
+            m_nodeType = XmlNodeType.Attribute;
+
+            m_xmlDepth = m_elementDepth + 1;
+            m_qName = attr.QName;
+            m_value = attr.Value;
+            m_prefix = attr.Prefix;
+        }
+
+        public override bool MoveToElement() {
+            if (m_isAttribute) {
+                m_value = null;
+                m_nodeType = XmlNodeType.Element;
+                m_xmlDepth = m_elementDepth;
+                m_prefix = m_elementPrefix;
+                m_qName = m_elementQName;
+                m_isEmpty = m_elementIsEmpty;
+                m_isAttribute = false;
+                return true;
+            }
+            return false;
+        }
+
+        public override bool MoveToFirstAttribute() {
+            if (m_attributes != null && m_attributes.Length > 0) {
+                MoveToAttributeImpl(0);
+                return true;
+            }
+            return false;
+        }
+
+        public override bool MoveToNextAttribute() {
+            if (m_isAttribute) {
+                var next = m_currentAttribute + 1;
+                if (next < AttributeCount) {
+                    MoveToAttributeImpl(next);
+                    return true;
+                }
+                return false;
+            } else {
+                return MoveToFirstAttribute();
+            }
+
+        }
+
+        public override bool ReadAttributeValue() {
+            if (!m_isAttribute || m_currentAttributeRead)
+                return false;
+
+            ValueNode(m_attributes[m_currentAttribute].Value);
+            m_currentAttributeRead = true;
+            return true;
+        }
+
+        public override void ResolveEntity() {
+            /* do nothing */
+        }
+
+        /// <summary>
+        /// Determines do we need to increase depth after the current node
+        /// </summary>
+        /// <returns></returns>
+        public bool IsSibling() {
+            switch (m_nodeType) {
+                case XmlNodeType.None: // start document
+                case XmlNodeType.Attribute: // after attribute only it's content can be iterated with ReadAttributeValue method
+                    return false;
+                case XmlNodeType.Element:
+                    // if the elemnt is empty the next element will be it's sibling
+                    return m_isEmpty;
+
+                case XmlNodeType.Document:
+                case XmlNodeType.DocumentFragment:
+                case XmlNodeType.Entity:
+                case XmlNodeType.Text:
+                case XmlNodeType.CDATA:
+                case XmlNodeType.EntityReference:
+                case XmlNodeType.ProcessingInstruction:
+                case XmlNodeType.Comment:
+                case XmlNodeType.DocumentType:
+                case XmlNodeType.Notation:
+                case XmlNodeType.Whitespace:
+                case XmlNodeType.SignificantWhitespace:
+                case XmlNodeType.EndElement:
+                case XmlNodeType.EndEntity:
+                case XmlNodeType.XmlDeclaration:
+                default:
+                    return true;
+            }
+        }
+
+        void ValueNode(object value) {
+            if (!IsSibling()) // the node is nested
+                m_xmlDepth++;
+
+            m_qName = XmlQualifiedName.Empty;
+            m_nodeType = XmlNodeType.Text;
+            m_prefix = string.Empty;
+            m_value = value;
+            m_isEmpty = false;
+            m_attributes = null;
+        }
+
+        void ElementNode(string name, string ns, XmlSimpleAttribute[] attrs, bool empty) {
+            if (!IsSibling()) // the node is nested
+                m_xmlDepth++;
+
+            m_context = new XmlNameContext(m_context);
+            List<XmlSimpleAttribute> definedAttrs = null;
+
+            // define new namespaces
+            if (attrs != null) {
+                foreach (var attr in attrs) {
+                    if (attr.QName.Name == "xmlns") {
+                        m_context.DefinePrefix(ConvertValueToString(attr.Value), string.Empty);
+                    } else if (attr.Prefix == m_xmlnsPrefix) {
+                        m_context.DefinePrefix(ConvertValueToString(attr.Value), attr.QName.Name);
+                    } else {
+                        string attrPrefix;
+                        if (string.IsNullOrEmpty(attr.QName.Namespace))
+                            continue;
+
+                        // auto-define prefixes
+                        if (!m_context.LookupNamespacePrefix(attr.QName.Namespace, out attrPrefix) || string.IsNullOrEmpty(attrPrefix)) {
+                            // new namespace prefix added
+                            attrPrefix = m_context.CreateNamespacePrefix(attr.QName.Namespace);
+                            attr.Prefix = attrPrefix;
+
+                            if (definedAttrs == null)
+                                definedAttrs = new List<XmlSimpleAttribute>();
+
+                            definedAttrs.Add(new XmlSimpleAttribute(attrPrefix, m_xmlnsNamespace, m_xmlnsPrefix, attr.QName.Namespace));
+                        }
+                    }
+                }
+            }
+
+            string p;
+            // auto-define prefixes
+            if (!m_context.LookupNamespacePrefix(ns, out p)) {
+                p = m_context.CreateNamespacePrefix(ns);
+                if (definedAttrs == null)
+                    definedAttrs = new List<XmlSimpleAttribute>();
+
+                definedAttrs.Add(new XmlSimpleAttribute(p, m_xmlnsNamespace, m_xmlnsPrefix, ns));
+            }
+
+            if (definedAttrs != null) {
+                if (attrs != null)
+                    definedAttrs.AddRange(attrs);
+                attrs = definedAttrs.ToArray();
+            }
+
+            m_nodeType = XmlNodeType.Element;
+            m_qName = new XmlQualifiedName(name, ns);
+            m_prefix = p;
+            m_value = null;
+            m_isEmpty = empty;
+            m_attributes = attrs;
+        }
+
+        void EndElementNode(string name, string ns) {
+            if (IsSibling()) // closing the element which has children
+                m_xmlDepth--;
+
+            string p;
+            if (!m_context.LookupNamespacePrefix(ns, out p))
+                throw new Exception($"Failed to lookup namespace '{ns}'");
+
+            m_context = m_context.ParentContext;
+            m_nodeType = XmlNodeType.EndElement;
+            m_prefix = p;
+            m_qName = new XmlQualifiedName(name, ns);
+            m_value = null;
+            m_attributes = null;
+            m_isEmpty = false;
+        }
+
+        void XmlDeclaration() {
+            if (!IsSibling()) // the node is nested
+                m_xmlDepth++;
+            m_nodeType = XmlNodeType.XmlDeclaration;
+            m_qName = new XmlQualifiedName("xml");
+            m_value = "version='1.0'";
+            m_prefix = string.Empty;
+            m_attributes = null;
+            m_isEmpty = false;
+        }
+
+        public override bool Read() {
+            try {
+                string elementName;
+                XmlSimpleAttribute[] elementAttrs = null;
+                MoveToElement();
+
+                switch (m_position) {
+                    case JsonXmlReaderPosition.Initial:
+                        m_jsonLocalName = m_jsonRootName;
+                        m_jsonSkip = false;
+                        XmlDeclaration();
+                        m_position = JsonXmlReaderPosition.Declaration;
+                        return true;
+                    case JsonXmlReaderPosition.Declaration:
+                        elementAttrs = new[] {
+                            new XmlSimpleAttribute(m_xsiPrefix, m_xmlnsNamespace, m_xmlnsPrefix, m_xsiNamespace),
+                            string.IsNullOrEmpty(m_jsonPrefix) ?
+                                new XmlSimpleAttribute(m_xmlnsPrefix, string.Empty, string.Empty, m_jsonNamespace) :
+                                new XmlSimpleAttribute(m_jsonPrefix, m_xmlnsNamespace, m_xmlnsPrefix, m_jsonNamespace)
+                        };
+                        break;
+                    case JsonXmlReaderPosition.ValueElement:
+                        if (!m_isEmpty) {
+                            ValueNode(m_parser.ElementValue);
+                            m_position = JsonXmlReaderPosition.ValueContent;
+                            return true;
+                        } else {
+                            m_position = JsonXmlReaderPosition.ValueEndElement;
+                            break;
+                        }
+                    case JsonXmlReaderPosition.ValueContent:
+                        EndElementNode(m_jsonValueName, m_jsonNamespace);
+                        m_position = JsonXmlReaderPosition.ValueEndElement;
+                        return true;
+                    case JsonXmlReaderPosition.Eof:
+                    case JsonXmlReaderPosition.Closed:
+                    case JsonXmlReaderPosition.Error:
+                        return false;
+                }
+
+                while (m_parser.Read()) {
+                    var jsonName = m_nameTable.Add(m_parser.ElementName);
+
+                    switch (m_parser.ElementType) {
+                        case JSONElementType.BeginObject:
+                            if (!EnterJsonObject(jsonName, out elementName))
+                                continue;
+
+                            m_position = JsonXmlReaderPosition.BeginObject;
+                            ElementNode(elementName, m_jsonNamespace, elementAttrs, false);
+                            break;
+                        case JSONElementType.EndObject:
+                            if (!LeaveJsonScope(out elementName))
+                                continue;
+
+                            m_position = JsonXmlReaderPosition.EndObject;
+                            EndElementNode(elementName, m_jsonNamespace);
+                            break;
+                        case JSONElementType.BeginArray:
+                            if (!EnterJsonArray(jsonName, out elementName))
+                                continue;
+
+                            m_position = JsonXmlReaderPosition.BeginArray;
+                            ElementNode(elementName, m_jsonNamespace, elementAttrs, false);
+                            break;
+                        case JSONElementType.EndArray:
+                            if (!LeaveJsonScope(out elementName))
+                                continue;
+
+                            m_position = JsonXmlReaderPosition.EndArray;
+                            EndElementNode(elementName, m_jsonNamespace);
+                            break;
+                        case JSONElementType.Value:
+                            if (!VisitJsonValue(jsonName, out m_jsonValueName))
+                                continue;
+
+                            m_position = JsonXmlReaderPosition.ValueElement;
+                            if (m_parser.ElementValue == null)
+                                // generate empty element with xsi:nil="true" attribute
+                                ElementNode(
+                                    m_jsonValueName,
+                                    m_jsonNamespace,
+                                    new[] {
+                                        new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, true)
+                                    },
+                                    true
+                                );
+                            else
+                                ElementNode(m_jsonValueName, m_jsonNamespace, elementAttrs, m_parser.ElementValue as string == string.Empty);
+                            break;
+                        default:
+                            throw new Exception($"Unexpected JSON element {m_parser.ElementType}: {m_parser.ElementName}");
+                    }
+                    return true;
+                }
+
+                m_position = JsonXmlReaderPosition.Eof;
+                return false;
+            } catch {
+                m_position = JsonXmlReaderPosition.Error;
+                throw;
+            }
+        }
+
+        void SaveJsonName() {
+            m_jsonNameStack.Push(new JsonContext {
+                skip = m_jsonSkip,
+                localName = m_jsonLocalName
+            });
+
+        }
+
+        bool EnterJsonObject(string name, out string elementName) {
+            SaveJsonName();
+            m_jsonSkip = false;
+
+            if (string.IsNullOrEmpty(name)) {
+                if (m_jsonNameStack.Count != 1 && !m_jsonFlattenArrays)
+                    m_jsonLocalName = m_jsonArrayItemName;
+            } else {
+                m_jsonLocalName = name;
+            }
+
+            elementName = m_jsonLocalName;
+            return true;
+        }
+
+        /// <summary>
+        /// Called when JSON parser visits BeginArray ('[') element.
+        /// </summary>
+        /// <param name="name">Optional property name if the array is the member of an object</param>
+        /// <returns>true if element should be emited, false otherwise</returns>
+        bool EnterJsonArray(string name, out string elementName) {
+            SaveJsonName();
+
+            if (string.IsNullOrEmpty(name)) {
+                // m_jsonNameStack.Count == 1 means the root node
+                if (m_jsonNameStack.Count != 1 && !m_jsonFlattenArrays)
+                    m_jsonLocalName = m_jsonArrayItemName;
+
+                m_jsonSkip = false; // we should not flatten arrays inside arrays or in the document root
+            } else {
+                m_jsonLocalName = name;
+                m_jsonSkip = m_jsonFlattenArrays;
+            }
+            elementName = m_jsonLocalName;
+
+            return !m_jsonSkip;
+        }
+
+        bool VisitJsonValue(string name, out string elementName) {
+            if (string.IsNullOrEmpty(name)) {
+                // m_jsonNameStack.Count == 0 means that JSON document consists from simple value
+                elementName = (m_jsonNameStack.Count == 0 || m_jsonFlattenArrays) ? m_jsonLocalName : m_jsonArrayItemName;
+            } else {
+                elementName = name;
+            }
+            return true;
+        }
+
+        bool LeaveJsonScope(out string elementName) {
+            elementName = m_jsonLocalName;
+            var skip = m_jsonSkip;
+
+            var prev = m_jsonNameStack.Pop();
+            m_jsonLocalName = prev.localName;
+            m_jsonSkip = prev.skip;
+
+            return !skip;
+        }
+
+        public override string ToString() {
+            switch (NodeType) {
+                case XmlNodeType.Element:
+                    return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{ConvertValueToString(x.Value)}'"))} {(IsEmptyElement ? "/" : "")}>";
+                case XmlNodeType.Attribute:
+                    return $"@{Name}";
+                case XmlNodeType.Text:
+                    return $"{Value}";
+                case XmlNodeType.CDATA:
+                    return $"<![CDATA[{Value}]]>";
+                case XmlNodeType.EntityReference:
+                    return $"&{Name};";
+                case XmlNodeType.EndElement:
+                    return $"</{Name}>";
+                default:
+                    return $".{NodeType} {Name} {Value}";
+            }
+        }
+    }
+}