Mercurial > pub > ImplabNet
changeset 228:6fa235c5a760 v2
Rewritten JsonScanner, JsonParser, fixed naming style
line wrap: on
line diff
--- a/Implab.Format.Test/JsonTests.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab.Format.Test/JsonTests.cs Tue Sep 12 01:19:12 2017 +0300 @@ -1,41 +1,42 @@ using NUnit.Framework; using System; -using Implab.Formats.JSON; using Implab.Automaton; using Implab.Xml; using System.Xml; -using System.Text; +using Implab.Formats; +using Implab.Formats.Json; namespace Implab.Format.Test { [TestFixture] public class JsonTests { + [Test] public void TestScannerValidTokens() { - using (var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { + using (var scanner = JsonStringScanner.Create(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { Tuple<JsonTokenType, object>[] expexted = { new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text"), - new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", "), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, null), new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal"), - new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " ["), - new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]"), - new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{"), - new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}"), - new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":") + new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, null), + new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, null), + new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, null), + new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, null), + new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, null) }; object value; @@ -70,7 +71,7 @@ }; foreach (var json in bad) { - using (var scanner = new JSONScanner(json)) { + using (var scanner = JsonStringScanner.Create(json)) { try { object value; JsonTokenType token; @@ -122,7 +123,7 @@ void DumpJsonParse(string json) { Console.WriteLine($"JSON: {json}"); Console.WriteLine("XML"); - using (var xmlReader = new JsonXmlReader(new JSONParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "json" })) { + using (var xmlReader = new JsonXmlReader(new JsonParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "json" })) { while (xmlReader.Read()) Console.WriteLine($"{new string(' ', xmlReader.Depth * 2)}{xmlReader}"); } @@ -136,7 +137,7 @@ CloseOutput = false, ConformanceLevel = ConformanceLevel.Document })) - using (var xmlReader = new JsonXmlReader(new JSONParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "", FlattenArrays = true })) { + using (var xmlReader = new JsonXmlReader(new JsonParser(json), new JsonXmlReaderOptions { NamespaceUri = "JsonXmlReaderSimpleTest", NodesPrefix = "", FlattenArrays = true })) { xmlWriter.WriteNode(xmlReader, false); } }
--- a/Implab/Formats/CharAlphabet.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Formats/CharAlphabet.cs Tue Sep 12 01:19:12 2017 +0300 @@ -1,6 +1,7 @@ using System.Collections.Generic; using System.Linq; using Implab.Automaton; +using System; namespace Implab.Formats { public class CharAlphabet: IndexedAlphabetBase<char> { @@ -12,5 +13,24 @@ public IEnumerable<char> InputSymbols { get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } } + + public CharMap CreateCharMap() { + var map = new Dictionary<int, int>(); + + int max = 0, min = char.MaxValue; + foreach (var p in Mappings) { + var index = GetSymbolIndex(p.Key); + max = Math.Max(max, index); + min = Math.Min(min, index); + map[index] = p.Value; + } + + var result = new int[max - min + 1]; + + for (int i = 0; i < result.Length; i++) + map.TryGetValue(min + i, out result[i]); + + return new CharMap((char)min, result); + } } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/CharMap.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,42 @@ +using Implab.Automaton; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats { + public class CharMap : IAlphabet<char> { + readonly char m_min; + readonly char m_max; + readonly int[] m_map; + + public CharMap(char min, int[] map) { + Safe.ArgumentNotNull(map, nameof(map)); + Count = map.Max()+1; + m_min = min; + m_map = map; + m_max = (char)(min + map.Length); + } + + public int Count { + get; private set; + } + + public bool Contains(char symbol) { + return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UNCLASSIFIED_INPUT; + } + + public IEnumerable<char> GetSymbols(int cls) { + for (var i = 0; i < m_map.Length; i++) + if (m_map[i] == cls) + yield return (char)(i + m_min); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int Translate(char symbol) { + return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UNCLASSIFIED_INPUT; + } + } +}
--- a/Implab/Formats/Grammar.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Formats/Grammar.cs Tue Sep 12 01:19:12 2017 +0300 @@ -67,32 +67,6 @@ return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); } - - protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); - - protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) { - - var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); - - var visitor = new RegularExpressionVisitor<TTag>(dfa); - regexp.Accept(visitor); - visitor.BuildDFA(); - - if (dfa.IsFinalState(dfa.InitialState)) - throw new ApplicationException("The specified language contains empty token"); - - var ab = CreateAlphabet(); - var optimal = dfa.Optimize(ab); - - return new ScannerContext<TTag>( - optimal.CreateTransitionTable(), - optimal.CreateFinalStateTable(), - optimal.CreateTagTable(), - optimal.InitialState, - ab.GetTranslationMap() - ); - } - }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/InputScanner.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,84 @@ +using Implab.Automaton; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats { + public class InputScanner<TTag> { + readonly TTag[] m_tags; + readonly int m_initialState; + readonly int[,] m_dfa; + readonly CharMap m_alphabet; + readonly bool[] m_final; + + int m_position; + int m_state; + + public InputScanner(int[,] dfaTable, bool[] finalStates, TTag[] tags, int initialState, CharMap alphabet) { + Safe.ArgumentNotNull(dfaTable, nameof(dfaTable)); + Safe.ArgumentNotNull(finalStates, nameof(finalStates)); + Safe.ArgumentNotNull(tags, nameof(tags)); + Safe.ArgumentNotNull(alphabet, nameof(alphabet)); + + m_dfa = dfaTable; + m_final = finalStates; + m_tags = tags; + m_initialState = initialState; + m_alphabet = alphabet; + } + + public TTag Tag { + get { + return m_tags[m_state]; + } + } + + public int Position { + get { + return m_position; + } + } + + public bool IsFinal { + get { + return m_final[m_state]; + } + } + + public void Reset() { + m_state = m_initialState; + } + + public InputScanner<TTag> Clone() { + var clone = new InputScanner<TTag>(m_dfa, m_final, m_tags, m_initialState, m_alphabet); + clone.m_state = m_state; + clone.m_position = m_position; + return clone; + } + + public bool Scan(char[] data, int offset, int length) { + if (length <= 0) { + m_position = offset; + return false; // EOF + } + + var max = offset + length; + var next = m_state; + + while(offset < max) { + next = m_dfa[next, m_alphabet.Translate(data[offset])]; + if (next == AutomatonConst.UNREACHABLE_STATE) { + // scanner stops on the next position after last recognized symbol + m_position = offset; + return false; + } + m_state = next; + offset++; + } + m_position = offset; + return true; + } + } +}
--- a/Implab/Formats/JSON/JSONElementContext.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -namespace Implab.Formats.JSON { - /// <summary> - /// internal - /// </summary> - enum JSONElementContext { - None, - Object, - Array, - Closed - } -}
--- a/Implab/Formats/JSON/JSONElementType.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -namespace Implab.Formats.JSON { - /// <summary> - /// Тип элемента на котором находится парсер - /// </summary> - public enum JSONElementType { - None, - /// <summary> - /// Начало объекта - /// </summary> - BeginObject, - /// <summary> - /// Конец объекта - /// </summary> - EndObject, - /// <summary> - /// Начало массива - /// </summary> - BeginArray, - /// <summary> - /// Конец массива - /// </summary> - EndArray, - /// <summary> - /// Простое значение - /// </summary> - Value - } -}
--- a/Implab/Formats/JSON/JSONGrammar.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ -using System.Linq; -using Implab.Automaton.RegularExpressions; -using System; -using Implab.Automaton; -using Implab.Components; - -namespace Implab.Formats.JSON { - class JSONGrammar : Grammar<char> { - public enum TokenType { - None, - BeginObject, - EndObject, - BeginArray, - EndArray, - String, - Number, - Literal, - NameSeparator, - ValueSeparator, - Whitespace, - - StringBound, - EscapedChar, - UnescapedChar, - EscapedUnicode - } - - static LazyAndWeak<JSONGrammar> _instance = new LazyAndWeak<JSONGrammar>(() => new JSONGrammar()); - - public static JSONGrammar Instance { - get { return _instance.Value; } - } - - readonly ScannerContext<TokenType> m_jsonExpression; - readonly ScannerContext<TokenType> m_stringExpression; - readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); - - public JSONGrammar() { - DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); - var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); - var digit9 = SymbolRangeToken('1', '9'); - var zero = SymbolToken('0'); - var digit = zero.Or(digit9); - var dot = SymbolToken('.'); - var minus = SymbolToken('-'); - var sign = SymbolSetToken('-', '+'); - var expSign = SymbolSetToken('e', 'E'); - var letters = SymbolRangeToken('a', 'z'); - var integer = zero.Or(digit9.Cat(digit.EClosure())); - var frac = dot.Cat(digit.Closure()); - var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); - var quote = SymbolToken('"'); - var backSlash = SymbolToken('\\'); - var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); - var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); - var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); - var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); - var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); - var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); - var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); - var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); - var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); - - var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); - var literal = letters.Closure(); - var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); - - var jsonExpression = - number.Tag(TokenType.Number) - .Or(literal.Tag(TokenType.Literal)) - .Or(quote.Tag(TokenType.StringBound)) - .Or(beginObject.Tag(TokenType.BeginObject)) - .Or(endObject.Tag(TokenType.EndObject)) - .Or(beginArray.Tag(TokenType.BeginArray)) - .Or(endArray.Tag(TokenType.EndArray)) - .Or(nameSep.Tag(TokenType.NameSeparator)) - .Or(valueSep.Tag(TokenType.ValueSeparator)) - .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace)); - - - var jsonStringExpression = - quote.Tag(TokenType.StringBound) - .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) - .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) - .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - - - m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); - m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); - - - } - - protected override IAlphabetBuilder<char> AlphabetBuilder { - get { - return m_defaultAlphabet; - } - } - - public ScannerContext<TokenType> JsonExpression { - get { - return m_jsonExpression; - } - } - - public ScannerContext<TokenType> JsonStringExpression { - get { - return m_stringExpression; - } - } - - Token SymbolRangeToken(char start, char stop) { - return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); - } - - protected override IndexedAlphabetBase<char> CreateAlphabet() { - return new CharAlphabet(); - } - - } -}
--- a/Implab/Formats/JSON/JSONParser.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,294 +0,0 @@ -using System; -using System.Diagnostics; -using System.IO; -using Implab.Automaton; -using Implab.Automaton.RegularExpressions; -using System.Linq; -using Implab.Components; -using System.Collections.Generic; - -namespace Implab.Formats.JSON { - /// <summary> - /// Pull парсер JSON данных. - /// </summary> - /// <remarks> - /// Следует отметить отдельную интерпретацию свойства <see cref="Level"/>, - /// оно означает текущий уровень вложенности объектов, однако закрывающий - /// элемент объекта и массива имеет уровень меньше, чем сам объект. - /// <code> - /// { // Level = 1 - /// "name" : "Peter", // Level = 1 - /// "address" : { // Level = 2 - /// city : "Stern" // Level = 2 - /// } // Level = 1 - /// } // Level = 0 - /// </code> - /// </remarks> - public class JSONParser : Disposable { - - enum MemberContext { - MemberName, - MemberValue - } - - #region Parser rules - struct ParserContext { - readonly int[,] m_dfa; - int m_state; - - readonly JSONElementContext m_elementContext; - - public ParserContext(int[,] dfa, int state, JSONElementContext context) { - m_dfa = dfa; - m_state = state; - m_elementContext = context; - } - - public bool Move(JsonTokenType token) { - var next = m_dfa[m_state, (int)token]; - if (next == AutomatonConst.UNREACHABLE_STATE) - return false; - m_state = next; - return true; - } - - public JSONElementContext ElementContext { - get { return m_elementContext; } - } - } - - static readonly ParserContext _jsonContext; - static readonly ParserContext _objectContext; - static readonly ParserContext _arrayContext; - - static JSONParser() { - - var valueExpression = MakeToken(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); - var memberExpression = MakeToken(JsonTokenType.String).Cat(MakeToken(JsonTokenType.NameSeparator)).Cat(valueExpression); - - var objectExpression = memberExpression - .Cat( - MakeToken(JsonTokenType.ValueSeparator) - .Cat(memberExpression) - .EClosure() - ) - .Optional() - .Cat(MakeToken(JsonTokenType.EndObject)) - .End(); - - var arrayExpression = valueExpression - .Cat( - MakeToken(JsonTokenType.ValueSeparator) - .Cat(valueExpression) - .EClosure() - ) - .Optional() - .Cat(MakeToken(JsonTokenType.EndArray)) - .End(); - - var jsonExpression = valueExpression.End(); - - _jsonContext = CreateParserContext(jsonExpression, JSONElementContext.None); - _objectContext = CreateParserContext(objectExpression, JSONElementContext.Object); - _arrayContext = CreateParserContext(arrayExpression, JSONElementContext.Array); - } - - static Token MakeToken(params JsonTokenType[] input) { - return Token.New( input.Select(t => (int)t).ToArray() ); - } - - static ParserContext CreateParserContext(Token expr, JSONElementContext context) { - - var dfa = new DFATable(); - var builder = new RegularExpressionVisitor(dfa); - expr.Accept(builder); - builder.BuildDFA(); - - return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); - } - - #endregion - - readonly JSONScanner m_scanner; - // json starts from the value context and may content even a single literal - MemberContext m_memberContext = MemberContext.MemberValue; - - JSONElementType m_elementType; - object m_elementValue; - string m_memberName = String.Empty; - - Stack<ParserContext> m_stack = new Stack<ParserContext>(); - ParserContext m_context = _jsonContext; - - /// <summary> - /// Создает новый парсер на основе строки, содержащей JSON - /// </summary> - /// <param name="text"></param> - public JSONParser(string text) { - Safe.ArgumentNotEmpty(text, "text"); - m_scanner = new JSONScanner(text); - } - - /// <summary> - /// Создает новый экземпляр парсера, на основе текстового потока. - /// </summary> - /// <param name="reader">Текстовый поток.</param> - public JSONParser(TextReader reader) { - Safe.ArgumentNotNull(reader, "reader"); - m_scanner = new JSONScanner(reader); - } - - public int Level { - get { return m_stack.Count; } - } - - /// <summary> - /// Тип текущего элемента на котором стоит парсер. - /// </summary> - public JSONElementType ElementType { - get { return m_elementType; } - } - - /// <summary> - /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда - /// пустая строка. - /// </summary> - public string ElementName { - get { return m_memberName; } - } - - /// <summary> - /// Значение элемента. Только для элементов типа <see cref="JSONElementType.Value"/>, для остальных <c>null</c> - /// </summary> - public object ElementValue { - get { return m_elementValue; } - } - - /// <summary> - /// Читает слеюудущий объект из потока - /// </summary> - /// <returns><c>true</c> - операция чтения прошла успешно, <c>false</c> - конец данных</returns> - public bool Read() { - object tokenValue; - JsonTokenType tokenType; - - m_memberName = String.Empty; - - while (m_scanner.ReadToken(out tokenValue, out tokenType)) { - if(!m_context.Move(tokenType)) - UnexpectedToken(tokenValue, tokenType); - - switch (tokenType) { - case JsonTokenType.BeginObject: - m_stack.Push(m_context); - m_context = _objectContext; - - m_elementValue = null; - m_memberContext = MemberContext.MemberName; - m_elementType = JSONElementType.BeginObject; - return true; - case JsonTokenType.EndObject: - if (m_stack.Count == 0) - UnexpectedToken(tokenValue, tokenType); - m_context = m_stack.Pop(); - - m_elementValue = null; - m_elementType = JSONElementType.EndObject; - return true; - case JsonTokenType.BeginArray: - m_stack.Push(m_context); - m_context = _arrayContext; - - m_elementValue = null; - m_memberContext = MemberContext.MemberValue; - m_elementType = JSONElementType.BeginArray; - return true; - case JsonTokenType.EndArray: - if (m_stack.Count == 0) - UnexpectedToken(tokenValue, tokenType); - m_context = m_stack.Pop(); - - m_elementValue = null; - m_elementType = JSONElementType.EndArray; - return true; - case JsonTokenType.String: - if (m_memberContext == MemberContext.MemberName) { - m_memberName = (string)tokenValue; - break; - } - m_elementType = JSONElementType.Value; - m_elementValue = tokenValue; - return true; - case JsonTokenType.Number: - m_elementType = JSONElementType.Value; - m_elementValue = tokenValue; - return true; - case JsonTokenType.Literal: - m_elementType = JSONElementType.Value; - m_elementValue = ParseLiteral((string)tokenValue); - return true; - case JsonTokenType.NameSeparator: - m_memberContext = MemberContext.MemberValue; - break; - case JsonTokenType.ValueSeparator: - m_memberContext = m_context.ElementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; - break; - default: - UnexpectedToken(tokenValue, tokenType); - break; - } - } - if (m_context.ElementContext != JSONElementContext.None) - throw new ParserException("Unexpedted end of data"); - - EOF = true; - - return false; - } - - object ParseLiteral(string literal) { - switch (literal) { - case "null": - return null; - case "false": - return false; - case "true": - return true; - default: - UnexpectedToken(literal, JsonTokenType.Literal); - return null; // avoid compliler error - } - } - - void UnexpectedToken(object value, JsonTokenType tokenType) { - throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); - } - - - /// <summary> - /// Признак конца потока - /// </summary> - public bool EOF { - get; - private set; - } - - protected override void Dispose(bool disposing) { - if (disposing) - m_scanner.Dispose(); - } - - /// <summary> - /// Переходит в конец текущего объекта. - /// </summary> - public void SeekElementEnd() { - var level = Level - 1; - - Debug.Assert(level >= 0); - - while (Level != level) - Read(); - } - } - -}
--- a/Implab/Formats/JSON/JSONScanner.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -using System; -using System.Globalization; -using Implab.Automaton; -using System.Text; -using Implab.Components; -using System.IO; - -namespace Implab.Formats.JSON { - /// <summary> - /// Сканнер (лексер), разбивающий поток символов на токены JSON. - /// </summary> - public class JSONScanner : Disposable { - readonly StringBuilder m_builder = new StringBuilder(); - - readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonExpression; - readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringExpression; - - - readonly TextScanner m_scanner; - - /// <summary> - /// Создает новый экземпляр сканнера - /// </summary> - public JSONScanner(string text) { - Safe.ArgumentNotEmpty(text, "text"); - - m_scanner = new StringScanner(text); - } - - public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { - Safe.ArgumentNotNull(reader, "reader"); - - m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); - } - - public JSONScanner(TextReader reader) : this(reader, 1024*1024, 1024){ - } - - /// <summary> - /// Читает следующий лексический элемент из входных данных. - /// </summary> - /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> - /// <param name="tokenType">Возвращает тип прочитанного токена.</param> - /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> - /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. - /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> - public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { - JSONGrammar.TokenType[] tag; - while (m_jsonContext.Execute(m_scanner, out tag)) { - switch (tag[0]) { - case JSONGrammar.TokenType.StringBound: - tokenValue = ReadString(); - tokenType = JsonTokenType.String; - break; - case JSONGrammar.TokenType.Number: - tokenValue = Double.Parse(m_scanner.GetTokenValue(), CultureInfo.InvariantCulture); - tokenType = JsonTokenType.Number; - break; - case JSONGrammar.TokenType.Whitespace: - continue; - default: - tokenType = (JsonTokenType)tag[0]; - tokenValue = m_scanner.GetTokenValue(); - break; - } - return true; - } - tokenValue = null; - tokenType = JsonTokenType.None; - return false; - } - - string ReadString() { - int pos = 0; - var buf = new char[6]; // the buffer for unescaping chars - - JSONGrammar.TokenType[] tag; - m_builder.Clear(); - - while (m_stringContext.Execute(m_scanner, out tag)) { - switch (tag[0]) { - case JSONGrammar.TokenType.StringBound: - return m_builder.ToString(); - case JSONGrammar.TokenType.UnescapedChar: - m_scanner.CopyTokenTo(m_builder); - break; - case JSONGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence - m_scanner.CopyTokenTo(buf, 0); - m_builder.Append(StringTranslator.TranslateHexUnicode(buf, 2)); - pos++; - break; - case JSONGrammar.TokenType.EscapedChar: // \t - escape sequence - m_scanner.CopyTokenTo(buf, 0); - m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); - break; - } - - } - - throw new ParserException("Unexpected end of data"); - } - - protected override void Dispose(bool disposing) { - if (disposing) - m_scanner.Dispose(); - base.Dispose(disposing); - } - } -}
--- a/Implab/Formats/JSON/JSONWriter.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,319 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Globalization; -using System.Diagnostics; - -namespace Implab.Formats.JSON { - public class JSONWriter { - struct Context { - public bool needComma; - public JSONElementContext element; - } - Stack<Context> m_contextStack = new Stack<Context>(); - Context m_context; - - const int BUFFER_SIZE = 64; - - TextWriter m_writer; - readonly bool m_indent = true; - readonly int m_indentSize = 4; - readonly char[] m_buffer = new char[BUFFER_SIZE]; - int m_bufferPos; - - static readonly char [] _hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - static readonly char [] _escapeBKS, - _escapeFWD, - _escapeCR, - _escapeNL, - _escapeTAB, - _escapeBSLASH, - _escapeQ; - - static JSONWriter() { - _escapeBKS = "\\b".ToCharArray(); - _escapeFWD = "\\f".ToCharArray(); - _escapeCR = "\\r".ToCharArray(); - _escapeNL = "\\n".ToCharArray(); - _escapeTAB = "\\t".ToCharArray(); - _escapeBSLASH = "\\\\".ToCharArray(); - _escapeQ = "\\\"".ToCharArray(); - } - - public JSONWriter(TextWriter writer) { - Safe.ArgumentNotNull(writer, "writer"); - m_writer = writer; - } - - public JSONWriter(TextWriter writer, bool indent) { - Safe.ArgumentNotNull(writer, "writer"); - - m_writer = writer; - m_indent = indent; - } - - void WriteIndent() { - if (m_indent) { - var indent = new char[m_contextStack.Count * m_indentSize + 1]; - indent[0] = '\n'; - for (int i = 1; i < indent.Length; i++) - indent[i] = ' '; - m_writer.Write(new String(indent)); - } else { - m_writer.Write(' '); - } - } - - void WriteMemberName(string name) { - Safe.ArgumentNotEmpty(name, "name"); - if (m_context.element != JSONElementContext.Object) - OperationNotApplicable("WriteMember"); - if (m_context.needComma) - m_writer.Write(","); - - WriteIndent(); - m_context.needComma = true; - Write(name); - m_writer.Write(" : "); - } - - public void WriteValue(string name, string value) { - WriteMemberName(name); - Write(value); - } - - public void WriteValue(string name, bool value) { - WriteMemberName(name); - Write(value); - } - - public void WriteValue(string name, double value) { - WriteMemberName(name); - Write(value); - } - - public void WriteValue(string value) { - if (m_context.element == JSONElementContext.Array) { - - if (m_context.needComma) - m_writer.Write(","); - WriteIndent(); - m_context.needComma = true; - - Write(value); - } else if (m_context.element == JSONElementContext.None) { - Write(value); - m_context.element = JSONElementContext.Closed; - } else { - OperationNotApplicable("WriteValue"); - } - } - - public void WriteValue(bool value) { - if (m_context.element == JSONElementContext.Array) { - - if (m_context.needComma) - m_writer.Write(","); - WriteIndent(); - m_context.needComma = true; - - Write(value); - } else if (m_context.element == JSONElementContext.None) { - Write(value); - m_context.element = JSONElementContext.Closed; - } else { - OperationNotApplicable("WriteValue"); - } - } - - public void WriteValue(double value) { - if (m_context.element == JSONElementContext.Array) { - - if (m_context.needComma) - m_writer.Write(","); - WriteIndent(); - m_context.needComma = true; - - Write(value); - } else if (m_context.element == JSONElementContext.None) { - Write(value); - m_context.element = JSONElementContext.Closed; - } else { - OperationNotApplicable("WriteValue"); - } - } - - public void BeginObject() { - if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) - OperationNotApplicable("BeginObject"); - if (m_context.needComma) - m_writer.Write(","); - - WriteIndent(); - - m_context.needComma = true; - - m_contextStack.Push(m_context); - - m_context = new Context { element = JSONElementContext.Object, needComma = false }; - m_writer.Write("{"); - } - - public void BeginObject(string name) { - WriteMemberName(name); - - m_contextStack.Push(m_context); - - m_context = new Context { element = JSONElementContext.Object, needComma = false }; - m_writer.Write("{"); - } - - public void EndObject() { - if (m_context.element != JSONElementContext.Object) - OperationNotApplicable("EndObject"); - - m_context = m_contextStack.Pop(); - if (m_contextStack.Count == 0) - m_context.element = JSONElementContext.Closed; - WriteIndent(); - m_writer.Write("}"); - } - - public void BeginArray() { - if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) - throw new InvalidOperationException(); - if (m_context.needComma) { - m_writer.Write(","); - - } - m_context.needComma = true; - - WriteIndent(); - m_contextStack.Push(m_context); - m_context = new Context { element = JSONElementContext.Array, needComma = false }; - m_writer.Write("["); - } - - public void BeginArray(string name) { - WriteMemberName(name); - - m_contextStack.Push(m_context); - - m_context = new Context { element = JSONElementContext.Array, needComma = false }; - m_writer.Write("["); - } - - public void EndArray() { - if (m_context.element != JSONElementContext.Array) - OperationNotApplicable("EndArray"); - - m_context = m_contextStack.Pop(); - if (m_contextStack.Count == 0) - m_context.element = JSONElementContext.Closed; - WriteIndent(); - m_writer.Write("]"); - } - - void Write(bool value) { - m_writer.Write(value ? "true" : "false"); - } - - void FlushBuffer() { - if (m_bufferPos > 0) { - m_writer.Write(m_buffer, 0, m_bufferPos); - m_bufferPos = 0; - } - } - - void Write(string value) { - if (value == null) { - m_writer.Write("null"); - return; - } - - Debug.Assert(m_bufferPos == 0); - - var chars = value.ToCharArray(); - m_buffer[m_bufferPos++] = '"'; - - // Analysis disable once ForCanBeConvertedToForeach - for (int i = 0; i < chars.Length; i++) { - var ch = chars[i]; - - char[] escapeSeq; - - switch (ch) { - case '\b': - escapeSeq = _escapeBKS; - break; - case '\f': - escapeSeq = _escapeFWD; - break; - case '\r': - escapeSeq = _escapeCR; - break; - case '\n': - escapeSeq = _escapeNL; - break; - case '\t': - escapeSeq = _escapeTAB; - break; - case '\\': - escapeSeq = _escapeBSLASH; - break; - case '"': - escapeSeq = _escapeQ; - break; - default: - if (ch < 0x20) { - if (m_bufferPos + 6 > BUFFER_SIZE) - FlushBuffer(); - - m_buffer[m_bufferPos++] = '\\'; - m_buffer[m_bufferPos++] = 'u'; - m_buffer[m_bufferPos++] = '0'; - m_buffer[m_bufferPos++] = '0'; - m_buffer[m_bufferPos++] = _hex[ch >> 4 & 0xf]; - m_buffer[m_bufferPos++] = _hex[ch & 0xf]; - - } else { - if (m_bufferPos >= BUFFER_SIZE) - FlushBuffer(); - m_buffer[m_bufferPos++] = ch; - } - continue; - } - - if (m_bufferPos + escapeSeq.Length > BUFFER_SIZE) - FlushBuffer(); - - Array.Copy(escapeSeq, 0, m_buffer, m_bufferPos, escapeSeq.Length); - m_bufferPos += escapeSeq.Length; - - } - - if (m_bufferPos >= BUFFER_SIZE) - FlushBuffer(); - - m_buffer[m_bufferPos++] = '"'; - - FlushBuffer(); - } - - void Write(double value) { - if (double.IsNaN(value)) - Write("NaN"); - else if (double.IsNegativeInfinity(value)) - Write("-Infinity"); - else if (double.IsPositiveInfinity(value)) - Write("Infinity"); - else - m_writer.Write(value.ToString(CultureInfo.InvariantCulture)); - } - - void OperationNotApplicable(string opName) { - throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); - } - - } -}
--- a/Implab/Formats/JSON/JSONXmlReader.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,343 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Globalization; -using System.IO; -using System.Xml; - -namespace Implab.Formats.JSON { - public class JSONXmlReader : XmlReader { - - enum ValueContext { - Undefined, - ElementStart, - ElementValue, - ElementEnd, - ElementEmpty - } - - struct LocalNameContext { - public string localName; - public bool isArray; - } - - JSONParser m_parser; - ValueContext m_valueContext; - ReadState m_state = ReadState.Initial; - Stack<LocalNameContext> m_localNameStack = new Stack<LocalNameContext>(); - LocalNameContext m_localName; - int m_depthCorrection; - - readonly string m_rootName; - readonly string m_prefix; - readonly string m_namespaceUri; - readonly bool m_flattenArrays; - readonly string m_arrayItemName; - readonly XmlNameTable m_nameTable; - - JSONXmlReader(JSONParser parser, JSONXmlReaderOptions options) { - m_parser = parser; - - if (options != null) { - m_prefix = options.NodesPrefix ?? String.Empty; - m_namespaceUri = options.NamespaceURI ?? String.Empty; - m_rootName = options.RootName ?? "json"; - m_flattenArrays = options.FlattenArrays; - m_arrayItemName = options.ArrayItemName ?? "item"; - m_nameTable = options.NameTable ?? new NameTable(); - } else { - m_prefix = String.Empty; - m_namespaceUri = String.Empty; - m_rootName = "json"; - m_flattenArrays = false; - m_arrayItemName = "item"; - m_nameTable = new NameTable(); - } - } - - /// <summary> - /// Always 0, JSON doesn't support attributes - /// </summary> - public override int AttributeCount { - get { return 0; } - } - - public override string BaseURI { - get { return String.Empty; } - } - - public override int Depth { - get { - return m_localNameStack.Count + m_depthCorrection; - } - } - - public override bool EOF { - get { return m_parser.EOF; } - } - - /// <summary> - /// Always throws an exception - /// </summary> - /// <param name="i"></param> - /// <returns></returns> - public override string GetAttribute(int i) { - throw new ArgumentOutOfRangeException(); - } - - /// <summary> - /// Always returns empty string - /// </summary> - /// <param name="name"></param> - /// <param name="namespaceURI"></param> - /// <returns></returns> - public override string GetAttribute(string name, string namespaceURI) { - return String.Empty; - } - - /// <summary> - /// Always returns empty string - /// </summary> - /// <param name="name"></param> - /// <returns></returns> - public override string GetAttribute(string name) { - return String.Empty; - } - - public override bool IsEmptyElement { - get { return m_parser.ElementType == JSONElementType.Value && m_valueContext == ValueContext.ElementEmpty; } - } - - public override string LocalName { - get { return m_localName.localName; } - } - - public override string LookupNamespace(string prefix) { - if (String.IsNullOrEmpty(prefix) || prefix == m_prefix) - return m_namespaceUri; - - return String.Empty; - } - - public override bool MoveToAttribute(string name, string ns) { - return false; - } - - public override bool MoveToAttribute(string name) { - return false; - } - - public override bool MoveToElement() { - return false; - } - - public override bool MoveToFirstAttribute() { - return false; - } - - public override bool MoveToNextAttribute() { - return false; - } - - public override XmlNameTable NameTable { - get { return m_nameTable; } - } - - public override string NamespaceURI { - get { return m_namespaceUri; } - } - - public override XmlNodeType NodeType { - get { - switch (m_parser.ElementType) { - case JSONElementType.BeginObject: - case JSONElementType.BeginArray: - return XmlNodeType.Element; - case JSONElementType.EndObject: - case JSONElementType.EndArray: - return XmlNodeType.EndElement; - case JSONElementType.Value: - switch (m_valueContext) { - case ValueContext.ElementStart: - case ValueContext.ElementEmpty: - return XmlNodeType.Element; - case ValueContext.ElementValue: - return XmlNodeType.Text; - case ValueContext.ElementEnd: - return XmlNodeType.EndElement; - default: - throw new InvalidOperationException(); - } - default: - throw new InvalidOperationException(); - } - } - } - - public override string Prefix { - get { return m_prefix; } - } - - public override bool Read() { - if (m_state != ReadState.Interactive && m_state != ReadState.Initial) - return false; - - if (m_state == ReadState.Initial) - m_state = ReadState.Interactive; - - try { - switch (m_parser.ElementType) { - case JSONElementType.Value: - switch (m_valueContext) { - case ValueContext.ElementStart: - SetLocalName(String.Empty); - m_valueContext = ValueContext.ElementValue; - return true; - case ValueContext.ElementValue: - RestoreLocalName(); - m_valueContext = ValueContext.ElementEnd; - return true; - case ValueContext.ElementEmpty: - case ValueContext.ElementEnd: - RestoreLocalName(); - break; - } - break; - case JSONElementType.EndArray: - case JSONElementType.EndObject: - RestoreLocalName(); - break; - } - string itemName = m_parser.ElementType == JSONElementType.None ? m_rootName : m_flattenArrays ? m_localName.localName : m_arrayItemName; - while (m_parser.Read()) { - if (!String.IsNullOrEmpty(m_parser.ElementName)) - itemName = m_parser.ElementName; - - switch (m_parser.ElementType) { - case JSONElementType.BeginArray: - if (m_flattenArrays && !m_localName.isArray) { - m_depthCorrection--; - SetLocalName(itemName, true); - continue; - } - SetLocalName(itemName, true); - break; - case JSONElementType.BeginObject: - SetLocalName(itemName); - break; - case JSONElementType.EndArray: - if (m_flattenArrays && !m_localNameStack.Peek().isArray) { - RestoreLocalName(); - m_depthCorrection++; - continue; - } - break; - case JSONElementType.EndObject: - break; - case JSONElementType.Value: - SetLocalName(itemName); - m_valueContext = m_parser.ElementValue == null ? ValueContext.ElementEmpty : ValueContext.ElementStart; - break; - } - return true; - } - - m_state = ReadState.EndOfFile; - return false; - } catch { - m_state = ReadState.Error; - throw; - } - } - - public override bool ReadAttributeValue() { - return false; - } - - public override ReadState ReadState { - get { return m_state; } - } - - public override void ResolveEntity() { - // do nothing - } - - public override string Value { - get { - if (m_parser.ElementValue == null) - return String.Empty; - - switch(Convert.GetTypeCode (m_parser.ElementValue)) { - case TypeCode.Double: - return ((double)m_parser.ElementValue).ToString (CultureInfo.InvariantCulture); - case TypeCode.String: - return (string)m_parser.ElementValue; - case TypeCode.Boolean: - return (bool)m_parser.ElementValue ? "true" : "false"; - default: - return m_parser.ElementValue.ToString (); - } - } - } - - void SetLocalName(string name) { - m_localNameStack.Push(m_localName); - m_localName.localName = name; - m_localName.isArray = false; - } - - void SetLocalName(string name, bool isArray) { - m_localNameStack.Push(m_localName); - m_localName.localName = name; - m_localName.isArray = isArray; - } - - void RestoreLocalName() { - m_localName = m_localNameStack.Pop(); - } - - public override void Close() { - - } - - protected override void Dispose(bool disposing) { - #if MONO - disposing = true; - #endif - if (disposing) { - m_parser.Dispose(); - } - base.Dispose(disposing); - } - - public static JSONXmlReader Create(string file, JSONXmlReaderOptions options) { - return Create(File.OpenText(file), options); - } - - /// <summary> - /// Creates the XmlReader for the specified text stream with JSON data. - /// </summary> - /// <param name="reader">Text reader.</param> - /// <param name="options">Options.</param> - /// <remarks> - /// The reader will be disposed when the XmlReader is disposed. - /// </remarks> - public static JSONXmlReader Create(TextReader reader, JSONXmlReaderOptions options) { - return new JSONXmlReader(new JSONParser(reader), options); - } - - /// <summary> - /// Creates the XmlReader for the specified stream with JSON data. - /// </summary> - /// <param name="stream">Stream.</param> - /// <param name="options">Options.</param> - /// <remarks> - /// The stream will be disposed when the XmlReader is disposed. - /// </remarks> - public static JSONXmlReader Create(Stream stream, JSONXmlReaderOptions options) { - Safe.ArgumentNotNull(stream, "stream"); - // HACK don't dispose StreaReader to keep stream opened - return Create(new StreamReader(stream), options); - } - } -}
--- a/Implab/Formats/JSON/JSONXmlReaderOptions.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ - -using System.Xml; - -namespace Implab.Xml { - /// <summary> - /// Набор необязательных параметров для <see cref="JSONXmlReader"/>, позволяющий управлять процессом - /// интерпретации <c>JSON</c> документа. - /// </summary> - public class JsonXmlReaderOptions { - /// <summary> - /// Пространство имен в котором будут располагаться читаемые элементы документа - /// </summary> - public string NamespaceUri { - get; - set; - } - - /// <summary> - /// Интерпретировать массивы как множественные элементы (убирает один уровень вложенности), иначе массив - /// представляется в виде узла, дочерними элементами которого являются элементы массива, имена дочерних элементов - /// определяются свойством <see cref="ArrayItemName"/>. По умолчанию <c>false</c>. - /// </summary> - public bool FlattenArrays { - get; - set; - } - - /// <summary> - /// Префикс, для узлов документа - /// </summary> - public string NodesPrefix { - get; - set; - } - - /// <summary> - /// Имя корневого элемента в xml документе - /// </summary> - public string RootName { - get; - set; - } - - /// <summary> - /// Имя элемента для массивов, если не включена опция <see cref="FlattenArrays"/>. - /// По умолчанию <c>item</c>. - /// </summary> - public string ArrayItemName { - get; - set; - } - - /// <summary> - /// Таблица атомизированных строк для построения документа. - /// </summary> - public XmlNameTable NameTable { - get; - set; - } - - } -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonElementContext.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,11 @@ +namespace Implab.Formats.Json { + /// <summary> + /// internal + /// </summary> + enum JsonElementContext { + None, + Object, + Array, + Closed + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonElementType.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,28 @@ +namespace Implab.Formats.Json { + /// <summary> + /// Тип элемента на котором находится парсер + /// </summary> + public enum JsonElementType { + None, + /// <summary> + /// Начало объекта + /// </summary> + BeginObject, + /// <summary> + /// Конец объекта + /// </summary> + EndObject, + /// <summary> + /// Начало массива + /// </summary> + BeginArray, + /// <summary> + /// Конец массива + /// </summary> + EndArray, + /// <summary> + /// Простое значение + /// </summary> + Value + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonGrammar.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,148 @@ +using System.Linq; +using Implab.Automaton.RegularExpressions; +using System; +using Implab.Automaton; +using Implab.Components; + +namespace Implab.Formats.Json { + public class JsonGrammar : Grammar<char> { + public enum TokenType { + None, + BeginObject, + EndObject, + BeginArray, + EndArray, + String, + Number, + Literal, + NameSeparator, + ValueSeparator, + Whitespace, + + StringBound, + EscapedChar, + UnescapedChar, + EscapedUnicode + } + + static LazyAndWeak<JsonGrammar> _instance = new LazyAndWeak<JsonGrammar>(() => new JsonGrammar()); + + public static JsonGrammar Instance { + get { return _instance.Value; } + } + + readonly InputScanner<TokenType> m_jsonExpression; + readonly InputScanner<TokenType> m_stringExpression; + readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); + + public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } } + + public JsonGrammar() { + DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); + var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); + var digit9 = SymbolRangeToken('1', '9'); + var zero = SymbolToken('0'); + var digit = zero.Or(digit9); + var dot = SymbolToken('.'); + var minus = SymbolToken('-'); + var sign = SymbolSetToken('-', '+'); + var expSign = SymbolSetToken('e', 'E'); + var letters = SymbolRangeToken('a', 'z'); + var integer = zero.Or(digit9.Cat(digit.EClosure())); + var frac = dot.Cat(digit.Closure()); + var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); + var quote = SymbolToken('"'); + var backSlash = SymbolToken('\\'); + var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); + var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); + var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); + var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); + var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); + var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); + var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); + var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); + var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); + + var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); + var literal = letters.Closure(); + var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); + + var jsonExpression = + number.Tag(TokenType.Number) + .Or(literal.Tag(TokenType.Literal)) + .Or(quote.Tag(TokenType.StringBound)) + .Or(beginObject.Tag(TokenType.BeginObject)) + .Or(endObject.Tag(TokenType.EndObject)) + .Or(beginArray.Tag(TokenType.BeginArray)) + .Or(endArray.Tag(TokenType.EndArray)) + .Or(nameSep.Tag(TokenType.NameSeparator)) + .Or(valueSep.Tag(TokenType.ValueSeparator)) + .Or(SymbolSetToken('\n', '\r', '\t', ' ').Closure().Tag(TokenType.Whitespace)); + + + var jsonStringExpression = + quote.Tag(TokenType.StringBound) + .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) + .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) + .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); + + + m_jsonExpression = BuildScanner(jsonExpression); + m_stringExpression = BuildScanner(jsonStringExpression); + } + + public static InputScanner<TokenType> CreateJsonExpressionScanner() { + return Instance.m_jsonExpression.Clone(); + } + + public static InputScanner<TokenType> CreateStringExpressionScanner() { + return Instance.m_stringExpression.Clone(); + } + + protected override IAlphabetBuilder<char> AlphabetBuilder { + get { + return m_defaultAlphabet; + } + } + + Token SymbolRangeToken(char start, char stop) { + return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); + } + + public InputScanner<TokenType> BuildScanner(Token regexp) { + var dfa = new RegularDFA<char, TokenType>(AlphabetBuilder); + + var visitor = new RegularExpressionVisitor<TokenType>(dfa); + regexp.Accept(visitor); + visitor.BuildDFA(); + + if (dfa.IsFinalState(dfa.InitialState)) + throw new ApplicationException("The specified language contains empty token"); + + var ab = new CharAlphabet(); + var optimal = dfa.Optimize(ab); + + return new InputScanner<TokenType>( + optimal.CreateTransitionTable(), + optimal.CreateFinalStateTable(), + NormalizeTags(optimal.CreateTagTable()), + optimal.InitialState, + ab.CreateCharMap() + ); + } + + static TokenType[] NormalizeTags(TokenType[][] tags) { + var result = new TokenType[tags.Length]; + for(var i = 0; i< tags.Length; i++) { + if (tags[i] == null || tags[i].Length == 0) + result[i] = default(TokenType); + else if (tags[i].Length == 1) + result[i] = tags[i][0]; + else + throw new Exception($"Ambigous state tags {string.Join(", ", tags[i])}"); + } + return result; + } + + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonParser.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,294 @@ +using System; +using System.Diagnostics; +using System.IO; +using Implab.Automaton; +using Implab.Automaton.RegularExpressions; +using System.Linq; +using Implab.Components; +using System.Collections.Generic; + +namespace Implab.Formats.Json { + /// <summary> + /// Pull парсер JSON данных. + /// </summary> + /// <remarks> + /// Следует отметить отдельную интерпретацию свойства <see cref="Level"/>, + /// оно означает текущий уровень вложенности объектов, однако закрывающий + /// элемент объекта и массива имеет уровень меньше, чем сам объект. + /// <code> + /// { // Level = 1 + /// "name" : "Peter", // Level = 1 + /// "address" : { // Level = 2 + /// city : "Stern" // Level = 2 + /// } // Level = 1 + /// } // Level = 0 + /// </code> + /// </remarks> + public class JsonParser : Disposable { + + enum MemberContext { + MemberName, + MemberValue + } + + #region Parser rules + struct ParserContext { + readonly int[,] m_dfa; + int m_state; + + readonly JsonElementContext m_elementContext; + + public ParserContext(int[,] dfa, int state, JsonElementContext context) { + m_dfa = dfa; + m_state = state; + m_elementContext = context; + } + + public bool Move(JsonTokenType token) { + var next = m_dfa[m_state, (int)token]; + if (next == AutomatonConst.UNREACHABLE_STATE) + return false; + m_state = next; + return true; + } + + public JsonElementContext ElementContext { + get { return m_elementContext; } + } + } + + static readonly ParserContext _jsonContext; + static readonly ParserContext _objectContext; + static readonly ParserContext _arrayContext; + + static JsonParser() { + + var valueExpression = MakeToken(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); + var memberExpression = MakeToken(JsonTokenType.String).Cat(MakeToken(JsonTokenType.NameSeparator)).Cat(valueExpression); + + var objectExpression = memberExpression + .Cat( + MakeToken(JsonTokenType.ValueSeparator) + .Cat(memberExpression) + .EClosure() + ) + .Optional() + .Cat(MakeToken(JsonTokenType.EndObject)) + .End(); + + var arrayExpression = valueExpression + .Cat( + MakeToken(JsonTokenType.ValueSeparator) + .Cat(valueExpression) + .EClosure() + ) + .Optional() + .Cat(MakeToken(JsonTokenType.EndArray)) + .End(); + + var jsonExpression = valueExpression.End(); + + _jsonContext = CreateParserContext(jsonExpression, JsonElementContext.None); + _objectContext = CreateParserContext(objectExpression, JsonElementContext.Object); + _arrayContext = CreateParserContext(arrayExpression, JsonElementContext.Array); + } + + static Token MakeToken(params JsonTokenType[] input) { + return Token.New( input.Select(t => (int)t).ToArray() ); + } + + static ParserContext CreateParserContext(Token expr, JsonElementContext context) { + + var dfa = new DFATable(); + var builder = new RegularExpressionVisitor(dfa); + expr.Accept(builder); + builder.BuildDFA(); + + return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); + } + + #endregion + + readonly JsonScanner m_scanner; + // json starts from the value context and may content even a single literal + MemberContext m_memberContext = MemberContext.MemberValue; + + JsonElementType m_elementType; + object m_elementValue; + string m_memberName = String.Empty; + + Stack<ParserContext> m_stack = new Stack<ParserContext>(); + ParserContext m_context = _jsonContext; + + /// <summary> + /// Создает новый парсер на основе строки, содержащей JSON + /// </summary> + /// <param name="text"></param> + public JsonParser(string text) { + Safe.ArgumentNotEmpty(text, "text"); + m_scanner = JsonStringScanner.Create(text); + } + + /// <summary> + /// Создает новый экземпляр парсера, на основе текстового потока. + /// </summary> + /// <param name="reader">Текстовый поток.</param> + public JsonParser(TextReader reader) { + Safe.ArgumentNotNull(reader, "reader"); + m_scanner = JsonTextScanner.Create(reader); + } + + public int Level { + get { return m_stack.Count; } + } + + /// <summary> + /// Тип текущего элемента на котором стоит парсер. + /// </summary> + public JsonElementType ElementType { + get { return m_elementType; } + } + + /// <summary> + /// Имя элемента - имя свойства родительского контейнера. Для элементов массивов и корневого всегда + /// пустая строка. + /// </summary> + public string ElementName { + get { return m_memberName; } + } + + /// <summary> + /// Значение элемента. Только для элементов типа <see cref="JsonElementType.Value"/>, для остальных <c>null</c> + /// </summary> + public object ElementValue { + get { return m_elementValue; } + } + + /// <summary> + /// Читает слеюудущий объект из потока + /// </summary> + /// <returns><c>true</c> - операция чтения прошла успешно, <c>false</c> - конец данных</returns> + public bool Read() { + object tokenValue; + JsonTokenType tokenType; + + m_memberName = String.Empty; + + while (m_scanner.ReadToken(out tokenValue, out tokenType)) { + if(!m_context.Move(tokenType)) + UnexpectedToken(tokenValue, tokenType); + + switch (tokenType) { + case JsonTokenType.BeginObject: + m_stack.Push(m_context); + m_context = _objectContext; + + m_elementValue = null; + m_memberContext = MemberContext.MemberName; + m_elementType = JsonElementType.BeginObject; + return true; + case JsonTokenType.EndObject: + if (m_stack.Count == 0) + UnexpectedToken(tokenValue, tokenType); + m_context = m_stack.Pop(); + + m_elementValue = null; + m_elementType = JsonElementType.EndObject; + return true; + case JsonTokenType.BeginArray: + m_stack.Push(m_context); + m_context = _arrayContext; + + m_elementValue = null; + m_memberContext = MemberContext.MemberValue; + m_elementType = JsonElementType.BeginArray; + return true; + case JsonTokenType.EndArray: + if (m_stack.Count == 0) + UnexpectedToken(tokenValue, tokenType); + m_context = m_stack.Pop(); + + m_elementValue = null; + m_elementType = JsonElementType.EndArray; + return true; + case JsonTokenType.String: + if (m_memberContext == MemberContext.MemberName) { + m_memberName = (string)tokenValue; + break; + } + m_elementType = JsonElementType.Value; + m_elementValue = tokenValue; + return true; + case JsonTokenType.Number: + m_elementType = JsonElementType.Value; + m_elementValue = tokenValue; + return true; + case JsonTokenType.Literal: + m_elementType = JsonElementType.Value; + m_elementValue = ParseLiteral((string)tokenValue); + return true; + case JsonTokenType.NameSeparator: + m_memberContext = MemberContext.MemberValue; + break; + case JsonTokenType.ValueSeparator: + m_memberContext = m_context.ElementContext == JsonElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; + break; + default: + UnexpectedToken(tokenValue, tokenType); + break; + } + } + if (m_context.ElementContext != JsonElementContext.None) + throw new ParserException("Unexpedted end of data"); + + EOF = true; + + return false; + } + + object ParseLiteral(string literal) { + switch (literal) { + case "null": + return null; + case "false": + return false; + case "true": + return true; + default: + UnexpectedToken(literal, JsonTokenType.Literal); + return null; // avoid compliler error + } + } + + void UnexpectedToken(object value, JsonTokenType tokenType) { + throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); + } + + + /// <summary> + /// Признак конца потока + /// </summary> + public bool EOF { + get; + private set; + } + + protected override void Dispose(bool disposing) { + if (disposing) + m_scanner.Dispose(); + } + + /// <summary> + /// Переходит в конец текущего объекта. + /// </summary> + public void SeekElementEnd() { + var level = Level - 1; + + Debug.Assert(level >= 0); + + while (Level != level) + Read(); + } + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonScanner.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,134 @@ +using System; +using System.Globalization; +using Implab.Automaton; +using System.Text; +using Implab.Components; +using System.IO; + +namespace Implab.Formats.Json { + /// <summary> + /// Сканнер (лексер), разбивающий поток символов на токены JSON. + /// </summary> + public abstract class JsonScanner : Disposable { + readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); + readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); + + readonly char[] m_unescapeBuf = new char[4]; + readonly char[] m_buffer; + int m_length; + int m_pos; + readonly StringBuilder m_tokenBuilder = new StringBuilder(); + + protected JsonScanner(char[] buffer, int pos, int length) { + m_buffer = buffer; + m_pos = pos; + m_length = length; + } + + bool Read(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { + scanner.Reset(); + + if (m_pos == m_length) { + m_pos = 0; + m_length = Read(m_buffer, 0, m_buffer.Length); + if (m_length == 0) { + tokenType = JsonGrammar.TokenType.None; + return false; // EOF + } + } + + while(scanner.Scan(m_buffer, m_pos, m_length - m_pos)) { + m_tokenBuilder.Append(m_buffer, m_pos, m_length - m_pos); + m_pos = 0; + m_length = Read(m_buffer, 0, m_buffer.Length); + } + var scannerPos = scanner.Position; + if (scannerPos != m_pos) { + m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos); + m_pos = scannerPos; + } + + if (!scanner.IsFinal) { + if (m_length == 0) { + // unexpected EOF + throw new ParserException("Unexpected EOF"); + } else { + // unecpected character + throw new ParserException($"Unexpected character '{m_buffer[m_pos + 1]}'"); + } + } + tokenType = scanner.Tag; + return true; + } + + protected abstract int Read(char[] buffer, int offset, int size); + + + /// <summary> + /// Читает следующий лексический элемент из входных данных. + /// </summary> + /// <param name="tokenValue">Возвращает значение прочитанного токена.</param> + /// <param name="tokenType">Возвращает тип прочитанного токена.</param> + /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns> + /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е. + /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> + public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { + JsonGrammar.TokenType tag; + m_tokenBuilder.Clear(); + while (Read(m_jsonContext, out tag)) { + switch (tag) { + case JsonGrammar.TokenType.StringBound: + tokenValue = ReadString(); + tokenType = JsonTokenType.String; + break; + case JsonGrammar.TokenType.Number: + tokenValue = Double.Parse(m_tokenBuilder.ToString(), CultureInfo.InvariantCulture); + tokenType = JsonTokenType.Number; + break; + case JsonGrammar.TokenType.Literal: + tokenType = JsonTokenType.Literal; + tokenValue = m_tokenBuilder.ToString(); + break; + case JsonGrammar.TokenType.Whitespace: + m_tokenBuilder.Clear(); + continue; + default: + tokenType = (JsonTokenType)tag; + tokenValue = null; + break; + } + return true; + } + tokenValue = null; + tokenType = JsonTokenType.None; + return false; + } + + string ReadString() { + JsonGrammar.TokenType tag; + m_tokenBuilder.Clear(); + + while (Read(m_stringContext, out tag)) { + switch (tag) { + case JsonGrammar.TokenType.StringBound: + m_tokenBuilder.Length--; + return m_tokenBuilder.ToString(); + case JsonGrammar.TokenType.UnescapedChar: + break; + case JsonGrammar.TokenType.EscapedUnicode: // \xXXXX - unicode escape sequence + m_tokenBuilder.CopyTo(m_tokenBuilder.Length - 4, m_unescapeBuf, 0, 4); + m_tokenBuilder.Length -= 6; + m_tokenBuilder.Append(StringTranslator.TranslateHexUnicode(m_unescapeBuf, 0)); + break; + case JsonGrammar.TokenType.EscapedChar: // \t - escape sequence + var ch = m_tokenBuilder[m_tokenBuilder.Length-1]; + m_tokenBuilder.Length -= 2; + m_tokenBuilder.Append(StringTranslator.TranslateEscapedChar(ch)); + break; + } + } + + throw new ParserException("Unexpected end of data"); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonStringScanner.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,76 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats.Json { + public class JsonStringScanner : JsonScanner { + const int _defaultBuffer = 64; + + readonly string m_data; + int m_offset; + + JsonStringScanner(string data, char[] buffer, int pos, int length, int offset) : base(buffer, pos, length) { + m_data = data; + m_offset = offset; + } + + protected override int Read(char[] buffer, int offset, int size) { + if (m_data == null) + return 0; + if (m_offset >= m_data.Length) + return 0; + + var count = Math.Min(size, m_data.Length - m_offset); + + m_data.CopyTo(m_offset, buffer, offset, count); + m_offset += count; + + return count; + } + + public static JsonStringScanner Create(string data) { + Safe.ArgumentNotNull(data, nameof(data)); + + if (data.Length <= _defaultBuffer) + return new JsonStringScanner(null, data.ToCharArray(), 0, data.Length, data.Length); + + var buffer = new char[_defaultBuffer]; + data.CopyTo(0, buffer, 0, _defaultBuffer); + return new JsonStringScanner(data, buffer, 0, _defaultBuffer, _defaultBuffer); + } + + public static JsonStringScanner Create(string data, int offset, int length) { + Safe.ArgumentNotNull(data, nameof(data)); + Safe.ArgumentGreaterThan(offset, 0, nameof(offset)); + Safe.ArgumentGreaterThan(length, 0, nameof(length)); + + if (offset + length > data.Length) + throw new ArgumentOutOfRangeException("Specified offset and length are out of the string bounds"); + + if (length <= _defaultBuffer) { + var buffer = new char[length]; + data.CopyTo(offset, buffer, 0, length); + + return new JsonStringScanner(null, buffer, 0, length, length); + } else { + var buffer = new char[_defaultBuffer]; + data.CopyTo(offset, buffer, 0, _defaultBuffer); + return new JsonStringScanner(data, buffer, 0, _defaultBuffer, offset + _defaultBuffer); + } + } + + public static JsonStringScanner Create(char[] data, int offset, int length) { + Safe.ArgumentNotNull(data, nameof(data)); + Safe.ArgumentGreaterThan(offset, 0, nameof(offset)); + Safe.ArgumentGreaterThan(length, 0, nameof(length)); + + if (offset + length > data.Length) + throw new ArgumentOutOfRangeException("Specified offset and length are out of the array bounds"); + + return new JsonStringScanner(null, data, offset, offset + length, offset + length); + + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonTextScanner.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,49 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Formats.Json { + public class JsonTextScanner : JsonScanner { + const int _bufferSize = 4096; + readonly TextReader m_reader; + + JsonTextScanner(TextReader reader, char[] buffer) : base(buffer, 0, 0) { + m_reader = reader; + } + + protected override int Read(char[] buffer, int offset, int size) { + return m_reader.Read(buffer, offset, size); + } + + public static JsonTextScanner Create(string file, Encoding encoding) { + return new JsonTextScanner(new StreamReader(file, encoding), new char[_bufferSize]); + } + + public static JsonTextScanner Create(string file) { + return new JsonTextScanner(new StreamReader(file), new char[_bufferSize]); + } + + public static JsonTextScanner Create(Stream stream, Encoding encoding) { + return new JsonTextScanner(new StreamReader(stream, encoding), new char[_bufferSize]); + } + + public static JsonTextScanner Create(Stream stream) { + return new JsonTextScanner(new StreamReader(stream), new char[_bufferSize]); + } + + public static JsonTextScanner Create(TextReader reader) { + Safe.ArgumentNotNull(reader, nameof(reader)); + return new JsonTextScanner(reader, new char[_bufferSize]); + } + + protected override void Dispose(bool disposing) { + if (disposing) + Safe.Dispose(m_reader); + + base.Dispose(disposing); + } + } +}
--- a/Implab/Formats/JSON/JsonTokenType.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Formats/JSON/JsonTokenType.cs Tue Sep 12 01:19:12 2017 +0300 @@ -1,6 +1,6 @@ -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// <summary> - /// Тип токенов, возвращаемых <see cref="JSONScanner"/>. + /// Тип токенов, возвращаемых <see cref="JsonScanner"/>. /// </summary> public enum JsonTokenType : int { None = 0,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/JSON/JsonWriter.cs Tue Sep 12 01:19:12 2017 +0300 @@ -0,0 +1,319 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Globalization; +using System.Diagnostics; + +namespace Implab.Formats.Json { + public class JsonWriter { + struct Context { + public bool needComma; + public JsonElementContext element; + } + Stack<Context> m_contextStack = new Stack<Context>(); + Context m_context; + + const int BUFFER_SIZE = 64; + + TextWriter m_writer; + readonly bool m_indent = true; + readonly int m_indentSize = 4; + readonly char[] m_buffer = new char[BUFFER_SIZE]; + int m_bufferPos; + + static readonly char [] _hex = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; + static readonly char [] _escapeBKS, + _escapeFWD, + _escapeCR, + _escapeNL, + _escapeTAB, + _escapeBSLASH, + _escapeQ; + + static JsonWriter() { + _escapeBKS = "\\b".ToCharArray(); + _escapeFWD = "\\f".ToCharArray(); + _escapeCR = "\\r".ToCharArray(); + _escapeNL = "\\n".ToCharArray(); + _escapeTAB = "\\t".ToCharArray(); + _escapeBSLASH = "\\\\".ToCharArray(); + _escapeQ = "\\\"".ToCharArray(); + } + + public JsonWriter(TextWriter writer) { + Safe.ArgumentNotNull(writer, "writer"); + m_writer = writer; + } + + public JsonWriter(TextWriter writer, bool indent) { + Safe.ArgumentNotNull(writer, "writer"); + + m_writer = writer; + m_indent = indent; + } + + void WriteIndent() { + if (m_indent) { + var indent = new char[m_contextStack.Count * m_indentSize + 1]; + indent[0] = '\n'; + for (int i = 1; i < indent.Length; i++) + indent[i] = ' '; + m_writer.Write(new String(indent)); + } else { + m_writer.Write(' '); + } + } + + void WriteMemberName(string name) { + Safe.ArgumentNotEmpty(name, "name"); + if (m_context.element != JsonElementContext.Object) + OperationNotApplicable("WriteMember"); + if (m_context.needComma) + m_writer.Write(","); + + WriteIndent(); + m_context.needComma = true; + Write(name); + m_writer.Write(" : "); + } + + public void WriteValue(string name, string value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string name, bool value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string name, double value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string value) { + if (m_context.element == JsonElementContext.Array) { + + if (m_context.needComma) + m_writer.Write(","); + WriteIndent(); + m_context.needComma = true; + + Write(value); + } else if (m_context.element == JsonElementContext.None) { + Write(value); + m_context.element = JsonElementContext.Closed; + } else { + OperationNotApplicable("WriteValue"); + } + } + + public void WriteValue(bool value) { + if (m_context.element == JsonElementContext.Array) { + + if (m_context.needComma) + m_writer.Write(","); + WriteIndent(); + m_context.needComma = true; + + Write(value); + } else if (m_context.element == JsonElementContext.None) { + Write(value); + m_context.element = JsonElementContext.Closed; + } else { + OperationNotApplicable("WriteValue"); + } + } + + public void WriteValue(double value) { + if (m_context.element == JsonElementContext.Array) { + + if (m_context.needComma) + m_writer.Write(","); + WriteIndent(); + m_context.needComma = true; + + Write(value); + } else if (m_context.element == JsonElementContext.None) { + Write(value); + m_context.element = JsonElementContext.Closed; + } else { + OperationNotApplicable("WriteValue"); + } + } + + public void BeginObject() { + if (m_context.element != JsonElementContext.None && m_context.element != JsonElementContext.Array) + OperationNotApplicable("BeginObject"); + if (m_context.needComma) + m_writer.Write(","); + + WriteIndent(); + + m_context.needComma = true; + + m_contextStack.Push(m_context); + + m_context = new Context { element = JsonElementContext.Object, needComma = false }; + m_writer.Write("{"); + } + + public void BeginObject(string name) { + WriteMemberName(name); + + m_contextStack.Push(m_context); + + m_context = new Context { element = JsonElementContext.Object, needComma = false }; + m_writer.Write("{"); + } + + public void EndObject() { + if (m_context.element != JsonElementContext.Object) + OperationNotApplicable("EndObject"); + + m_context = m_contextStack.Pop(); + if (m_contextStack.Count == 0) + m_context.element = JsonElementContext.Closed; + WriteIndent(); + m_writer.Write("}"); + } + + public void BeginArray() { + if (m_context.element != JsonElementContext.None && m_context.element != JsonElementContext.Array) + throw new InvalidOperationException(); + if (m_context.needComma) { + m_writer.Write(","); + + } + m_context.needComma = true; + + WriteIndent(); + m_contextStack.Push(m_context); + m_context = new Context { element = JsonElementContext.Array, needComma = false }; + m_writer.Write("["); + } + + public void BeginArray(string name) { + WriteMemberName(name); + + m_contextStack.Push(m_context); + + m_context = new Context { element = JsonElementContext.Array, needComma = false }; + m_writer.Write("["); + } + + public void EndArray() { + if (m_context.element != JsonElementContext.Array) + OperationNotApplicable("EndArray"); + + m_context = m_contextStack.Pop(); + if (m_contextStack.Count == 0) + m_context.element = JsonElementContext.Closed; + WriteIndent(); + m_writer.Write("]"); + } + + void Write(bool value) { + m_writer.Write(value ? "true" : "false"); + } + + void FlushBuffer() { + if (m_bufferPos > 0) { + m_writer.Write(m_buffer, 0, m_bufferPos); + m_bufferPos = 0; + } + } + + void Write(string value) { + if (value == null) { + m_writer.Write("null"); + return; + } + + Debug.Assert(m_bufferPos == 0); + + var chars = value.ToCharArray(); + m_buffer[m_bufferPos++] = '"'; + + // Analysis disable once ForCanBeConvertedToForeach + for (int i = 0; i < chars.Length; i++) { + var ch = chars[i]; + + char[] escapeSeq; + + switch (ch) { + case '\b': + escapeSeq = _escapeBKS; + break; + case '\f': + escapeSeq = _escapeFWD; + break; + case '\r': + escapeSeq = _escapeCR; + break; + case '\n': + escapeSeq = _escapeNL; + break; + case '\t': + escapeSeq = _escapeTAB; + break; + case '\\': + escapeSeq = _escapeBSLASH; + break; + case '"': + escapeSeq = _escapeQ; + break; + default: + if (ch < 0x20) { + if (m_bufferPos + 6 > BUFFER_SIZE) + FlushBuffer(); + + m_buffer[m_bufferPos++] = '\\'; + m_buffer[m_bufferPos++] = 'u'; + m_buffer[m_bufferPos++] = '0'; + m_buffer[m_bufferPos++] = '0'; + m_buffer[m_bufferPos++] = _hex[ch >> 4 & 0xf]; + m_buffer[m_bufferPos++] = _hex[ch & 0xf]; + + } else { + if (m_bufferPos >= BUFFER_SIZE) + FlushBuffer(); + m_buffer[m_bufferPos++] = ch; + } + continue; + } + + if (m_bufferPos + escapeSeq.Length > BUFFER_SIZE) + FlushBuffer(); + + Array.Copy(escapeSeq, 0, m_buffer, m_bufferPos, escapeSeq.Length); + m_bufferPos += escapeSeq.Length; + + } + + if (m_bufferPos >= BUFFER_SIZE) + FlushBuffer(); + + m_buffer[m_bufferPos++] = '"'; + + FlushBuffer(); + } + + void Write(double value) { + if (double.IsNaN(value)) + Write("NaN"); + else if (double.IsNegativeInfinity(value)) + Write("-Infinity"); + else if (double.IsPositiveInfinity(value)) + Write("Infinity"); + else + m_writer.Write(value.ToString(CultureInfo.InvariantCulture)); + } + + void OperationNotApplicable(string opName) { + throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); + } + + } +}
--- a/Implab/Formats/JSON/StringTranslator.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Formats/JSON/StringTranslator.cs Tue Sep 12 01:19:12 2017 +0300 @@ -7,7 +7,7 @@ using System.Text; using System.Threading.Tasks; -namespace Implab.Formats.JSON { +namespace Implab.Formats.Json { /// <summary> /// Класс для преобразования экранированной строки JSON /// </summary>
--- a/Implab/Formats/ReaderScanner.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -using System; -using System.IO; - -namespace Implab.Formats { - public class ReaderScanner: TextScanner { - const int CHUNK_SIZE = 1024*4; - const int BUFFER_MAX = CHUNK_SIZE*1024; - - readonly TextReader m_reader; - - public ReaderScanner(TextReader reader, int limit, int chunk) : base(limit, chunk) { - Safe.ArgumentNotNull(reader, "reader"); - m_reader = reader; - } - - public ReaderScanner(TextReader reader) : this(reader, BUFFER_MAX, CHUNK_SIZE) { - } - - protected override int Read(char[] buffer, int offset, int size) { - return m_reader.Read(buffer, offset, size); - } - - protected override void Dispose(bool disposing) { - if (disposing) - Safe.Dispose(m_reader); - base.Dispose(disposing); - } - } -} -
--- a/Implab/Formats/ScannerContext.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -namespace Implab.Formats { - /// <summary> - /// Represents a scanner configuration usefull to recongnize token, based on the DFA. - /// </summary> - public class ScannerContext<TTag> { - - public int[,] Dfa { get; private set; } - - public bool[] Final { get; private set; } - - public TTag[][] Tags { get; private set; } - - public int State { get; private set; } - - public int[] Alphabet { get; private set; } - - public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { - Dfa = dfa; - Final = final; - Tags = tags; - State = state; - Alphabet = alphabet; - } - - public bool Execute(TextScanner scanner, out TTag[] tag) { - return scanner.ReadToken(Dfa, Final, Tags, State, Alphabet, out tag); - } - } -} -
--- a/Implab/Formats/StringScanner.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -using System; - -namespace Implab.Formats { - public class StringScanner: TextScanner { - const int CHUNK_SIZE = 1024; - - public StringScanner(string text) : base(null) { - Safe.ArgumentNotNull(text, "text"); - var data = text.ToCharArray(); - Feed(data, 0, data.Length); - } - - protected override int Read(char[] buffer, int offset, int size) { - return 0; - } - } -} -
--- a/Implab/Formats/TextScanner.cs Sat Sep 09 03:53:13 2017 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,157 +0,0 @@ -using System; -using Implab.Components; -using System.Diagnostics; -using Implab.Automaton; -using System.Text; - -namespace Implab.Formats { - public abstract class TextScanner : Disposable { - readonly int m_bufferMax; - readonly int m_chunkSize; - - char[] m_buffer; - int m_bufferOffset; - int m_bufferSize; - int m_tokenOffset; - int m_tokenLength; - - /// <summary> - /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. - /// </summary> - /// <param name="bufferMax">Buffer max.</param> - /// <param name="chunkSize">Chunk size.</param> - protected TextScanner(int bufferMax, int chunkSize) { - Debug.Assert(m_chunkSize <= m_bufferMax); - - m_bufferMax = bufferMax; - m_chunkSize = chunkSize; - } - - /// <summary> - /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. - /// </summary> - /// <param name="buffer">Buffer.</param> - protected TextScanner(char[] buffer) { - if (buffer != null) { - m_buffer = buffer; - m_bufferSize = buffer.Length; - } - } - - /// <summary> - /// (hungry) Reads the next token. - /// </summary> - /// <returns><c>true</c>, if token internal was read, <c>false</c> if there is no more tokens in the stream.</returns> - /// <param name="dfa">The transition map for the automaton</param> - /// <param name="final">Final states of the automaton.</param> - /// <param name="tags">Tags.</param> - /// <param name="state">The initial state for the automaton.</param> - /// <param name="alphabet"></param> - /// <param name = "tag"></param> - internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { - m_tokenLength = 0; - tag = null; - - var maxSymbol = alphabet.Length - 1; - int next; - do { - // after the next chunk is read the offset in the buffer may change - int pos = m_bufferOffset + m_tokenLength; - next = state; - while (pos < m_bufferSize) { - var ch = m_buffer[pos]; - - next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; - - if (next == AutomatonConst.UNREACHABLE_STATE) - break; - - state = next; - pos++; - } - m_tokenLength = pos - m_bufferOffset; - } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); - - m_tokenOffset = m_bufferOffset; - m_bufferOffset += m_tokenLength; - - if (final[state]) { - tag = tags[state]; - return true; - } - - if (m_bufferOffset == m_bufferSize) { - if (m_tokenLength == 0) //EOF - return false; - - throw new ParserException(); - } - - throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); - - } - - protected void Feed(char[] buffer, int offset, int length) { - m_buffer = buffer; - m_bufferOffset = offset; - m_bufferSize = offset + length; - } - - protected bool Feed() { - if (m_chunkSize <= 0) - return false; - - if (m_buffer != null) { - var free = m_buffer.Length - m_bufferSize; - - if (free < m_chunkSize) { - free += m_chunkSize; - var used = m_bufferSize - m_bufferOffset; - var size = used + free; - - if (size > m_bufferMax) - throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax / 1024)); - - var temp = new char[size]; - - var read = Read(temp, used, m_chunkSize); - if (read == 0) - return false; - - Array.Copy(m_buffer, m_bufferOffset, temp, 0, used); - - m_bufferOffset = 0; - m_bufferSize = used + read; - m_buffer = temp; - } else { - var read = Read(m_buffer, m_bufferSize, m_chunkSize); - if (read == 0) - return false; - m_bufferSize += m_chunkSize; - } - return true; - } else { - Debug.Assert(m_bufferOffset == 0); - m_buffer = new char[m_chunkSize]; - m_bufferSize = Read(m_buffer, 0, m_chunkSize); - return (m_bufferSize != 0); - } - } - - protected abstract int Read(char[] buffer, int offset, int size); - - public string GetTokenValue() { - return new String(m_buffer, m_tokenOffset, m_tokenLength); - } - - public void CopyTokenTo(char[] buffer, int offset) { - Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); - } - - public void CopyTokenTo(StringBuilder sb) { - sb.Append(m_buffer, m_tokenOffset, m_tokenLength); - } - - } -} -
--- a/Implab/Implab.csproj Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Implab.csproj Tue Sep 12 01:19:12 2017 +0300 @@ -83,6 +83,10 @@ <Compile Include="Diagnostics\TraceEvent.cs" /> <Compile Include="Diagnostics\TraceEventType.cs" /> <Compile Include="Diagnostics\TraceSourceAttribute.cs" /> + <Compile Include="Formats\CharMap.cs" /> + <Compile Include="Formats\InputScanner.cs" /> + <Compile Include="Formats\Json\JsonStringScanner.cs" /> + <Compile Include="Formats\Json\JsonTextScanner.cs" /> <Compile Include="ICancellable.cs" /> <Compile Include="IProgressHandler.cs" /> <Compile Include="IProgressNotifier.cs" /> @@ -164,16 +168,14 @@ <Compile Include="Automaton\RegularExpressions\Token.cs" /> <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> <Compile Include="Automaton\AutomatonTransition.cs" /> - <Compile Include="Formats\JSON\JSONElementContext.cs" /> - <Compile Include="Formats\JSON\JSONElementType.cs" /> - <Compile Include="Formats\JSON\JSONGrammar.cs" /> - <Compile Include="Formats\JSON\JSONParser.cs" /> - <Compile Include="Formats\JSON\JSONScanner.cs" /> - <Compile Include="Formats\JSON\JsonTokenType.cs" /> - <Compile Include="Formats\JSON\JSONWriter.cs" /> - <Compile Include="Formats\JSON\JSONXmlReader.cs" /> - <Compile Include="Formats\JSON\JSONXmlReaderOptions.cs" /> - <Compile Include="Formats\JSON\StringTranslator.cs" /> + <Compile Include="Formats\Json\JsonElementContext.cs" /> + <Compile Include="Formats\Json\JsonElementType.cs" /> + <Compile Include="Formats\Json\JsonGrammar.cs" /> + <Compile Include="Formats\Json\JsonParser.cs" /> + <Compile Include="Formats\Json\JsonScanner.cs" /> + <Compile Include="Formats\Json\JsonTokenType.cs" /> + <Compile Include="Formats\Json\JsonWriter.cs" /> + <Compile Include="Formats\Json\StringTranslator.cs" /> <Compile Include="Automaton\MapAlphabet.cs" /> <Compile Include="Formats\CharAlphabet.cs" /> <Compile Include="Formats\ByteAlphabet.cs" /> @@ -182,10 +184,6 @@ <Compile Include="Automaton\DFATable.cs" /> <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> - <Compile Include="Formats\TextScanner.cs" /> - <Compile Include="Formats\StringScanner.cs" /> - <Compile Include="Formats\ReaderScanner.cs" /> - <Compile Include="Formats\ScannerContext.cs" /> <Compile Include="Formats\Grammar.cs" /> <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
--- a/Implab/Safe.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Safe.cs Tue Sep 12 01:19:12 2017 +0300 @@ -5,6 +5,7 @@ using System.Text.RegularExpressions; using System.Diagnostics; using System.Collections; +using System.Runtime.CompilerServices; #if NET_4_5 using System.Threading.Tasks; @@ -14,11 +15,13 @@ { public static class Safe { + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentAssert(bool condition, string paramName) { if (!condition) throw new ArgumentException("The parameter is invalid", paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentMatch(string value, string paramName, Regex rx) { if (rx == null) throw new ArgumentNullException("rx"); @@ -26,26 +29,37 @@ throw new ArgumentException(String.Format("The prameter value must match {0}", rx), paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentNotEmpty(string value, string paramName) { if (String.IsNullOrEmpty(value)) throw new ArgumentException("The parameter can't be empty", paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentNotEmpty<T>(T[] value, string paramName) { if (value == null || value.Length == 0) throw new ArgumentException("The array must be not emty", paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentNotNull(object value, string paramName) { if (value == null) throw new ArgumentNullException(paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void ArgumentGreaterThan(int value, int min, string paramName) { + if (value < min) + throw new ArgumentOutOfRangeException(paramName); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentInRange(int value, int min, int max, string paramName) { if (value < min || value > max) throw new ArgumentOutOfRangeException(paramName); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void ArgumentOfType(object value, Type type, string paramName) { if (!type.IsInstanceOfType(value)) throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName);
--- a/Implab/Xml/JsonXmlReader.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Xml/JsonXmlReader.cs Tue Sep 12 01:19:12 2017 +0300 @@ -1,10 +1,8 @@ -using Implab.Formats.JSON; +using Implab.Formats.Json; using System; using System.Collections.Generic; using System.Globalization; using System.Linq; -using System.Text; -using System.Threading.Tasks; using System.Xml; namespace Implab.Xml { @@ -14,7 +12,7 @@ public bool skip; } - JSONParser m_parser; + JsonParser m_parser; JsonXmlReaderOptions m_options; JsonXmlReaderPosition m_position = JsonXmlReaderPosition.Initial; XmlNameTable m_nameTable; @@ -52,7 +50,6 @@ XmlNameContext m_context; - int m_nextPrefix = 1; readonly string m_xmlnsPrefix; readonly string m_xmlnsNamespace; @@ -60,7 +57,7 @@ readonly string m_xsiNamespace; - public JsonXmlReader(JSONParser parser, JsonXmlReaderOptions options) { + public JsonXmlReader(JsonParser parser, JsonXmlReaderOptions options) { Safe.ArgumentNotNull(parser, nameof(parser)); m_parser = parser; @@ -480,35 +477,35 @@ var jsonName = m_nameTable.Add(m_parser.ElementName); switch (m_parser.ElementType) { - case JSONElementType.BeginObject: + case JsonElementType.BeginObject: if (!EnterJsonObject(jsonName, out elementName)) continue; m_position = JsonXmlReaderPosition.BeginObject; ElementNode(elementName, m_jsonNamespace, elementAttrs, false); break; - case JSONElementType.EndObject: + case JsonElementType.EndObject: if (!LeaveJsonScope(out elementName)) continue; m_position = JsonXmlReaderPosition.EndObject; EndElementNode(elementName, m_jsonNamespace); break; - case JSONElementType.BeginArray: + case JsonElementType.BeginArray: if (!EnterJsonArray(jsonName, out elementName)) continue; m_position = JsonXmlReaderPosition.BeginArray; ElementNode(elementName, m_jsonNamespace, elementAttrs, false); break; - case JSONElementType.EndArray: + case JsonElementType.EndArray: if (!LeaveJsonScope(out elementName)) continue; m_position = JsonXmlReaderPosition.EndArray; EndElementNode(elementName, m_jsonNamespace); break; - case JSONElementType.Value: + case JsonElementType.Value: if (!VisitJsonValue(jsonName, out m_jsonValueName)) continue;
--- a/Implab/Xml/JsonXmlReaderOptions.cs Sat Sep 09 03:53:13 2017 +0300 +++ b/Implab/Xml/JsonXmlReaderOptions.cs Tue Sep 12 01:19:12 2017 +0300 @@ -2,16 +2,16 @@ using System; using System.Xml; -namespace Implab.Formats.JSON { +namespace Implab.Xml { /// <summary> - /// Набор необязательных параметров для <see cref="JSONXmlReader"/>, позволяющий управлять процессом + /// Набор необязательных параметров для <see cref="JsonXmlReader"/>, позволяющий управлять процессом /// интерпретации <c>JSON</c> документа. /// </summary> - public class JSONXmlReaderOptions : ICloneable { + public class JsonXmlReaderOptions : ICloneable { /// <summary> /// Пространство имен в котором будут располагаться читаемые элементы документа /// </summary> - public string NamespaceURI { + public string NamespaceUri { get; set; }