Mercurial > pub > ImplabNet
changeset 236:302ca905c19e v2
JsonReader optimizations
author | cin |
---|---|
date | Tue, 21 Nov 2017 14:57:58 +0300 |
parents | b49969a7043c |
children | f2150c16b476 |
files | Implab.Format.Test/JsonTests.cs Implab.Playground/Program.cs Implab/Automaton/AutomatonConst.cs Implab/Automaton/DFATable.cs Implab/Automaton/MapAlphabet.cs Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Implab/Formats/CharAlphabet.cs Implab/Formats/CharMap.cs Implab/Formats/Grammar.cs Implab/Formats/InputScanner.cs Implab/Formats/Json/JsonGrammar.cs Implab/Formats/Json/JsonReader.cs Implab/Formats/Json/JsonScanner.cs Implab/Implab.csproj Implab/Xml/JsonXmlReader.cs Implab/Xml/XmlSimpleAttribute.cs |
diffstat | 16 files changed, 85 insertions(+), 164 deletions(-) [+] |
line wrap: on
line diff
--- a/Implab.Format.Test/JsonTests.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab.Format.Test/JsonTests.cs Tue Nov 21 14:57:58 2017 +0300 @@ -9,8 +9,8 @@ namespace Implab.Format.Test { [TestFixture] - public class JsonTests { - + public class JsonTests { + [Test] public void TestScannerValidTokens() { using (var scanner = JsonStringScanner.Create(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:")) { @@ -114,10 +114,36 @@ DumpJsonParse("[{\"info\": [7,8,9]}]"); DumpJsonFlatParse("[1,2,\"\",[3,4],{\"info\": [5,6]},{\"num\": [7,8,null]}, null,[null]]"); } - + + [Test] + public void JsonBenchmark() { + var t = Environment.TickCount; + using (var reader = new JsonXmlReader(JsonReader.Create("e:\\citylots.json"), new JsonXmlReaderOptions { NamespaceUri = "XmlReaderSimpleTest", RootName = "data" })) { + while (reader.Read()) { + } + } + Console.WriteLine($"JsonXmlReader: {Environment.TickCount - t} ms"); + + t = Environment.TickCount; + using(var reader = JsonReader.Create("e:\\citylots.json")) { + while(reader.Read()) { + } + } + + Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms"); + + t = Environment.TickCount; + using (var reader = XmlReader.Create("file:///e:\\citylots.xml")) { + while (reader.Read()) { + } + } + + Console.WriteLine($"XmlReader: {Environment.TickCount - t} ms"); + } + void AssertRead(XmlReader reader, XmlNodeType expected) { Assert.IsTrue(reader.Read()); - Console.WriteLine($"{new string(' ', reader.Depth*2)}{reader}"); + Console.WriteLine($"{new string(' ', reader.Depth * 2)}{reader}"); Assert.AreEqual(expected, reader.NodeType); }
--- a/Implab.Playground/Program.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab.Playground/Program.cs Tue Nov 21 14:57:58 2017 +0300 @@ -78,103 +78,13 @@ static void Main(string[] args) { - //var queue = new ConcurrentQueue<int>(); - var queue = new AsyncQueue<int>(); - //var queue = new SimpleAsyncQueue<int>(); - - const int wBatch = 32; - const long wCount = 1000000; - const long total = wBatch * wCount * 3; - - long r1 = 0, r2 = 0, r3 = 0; - const int rBatch = 1000; - long read = 0; - - var t1 = Environment.TickCount; - - AsyncPool.RunThread( - () => { - var buffer = new int[wBatch]; - for (int i = 0; i < wBatch; i++) - buffer[i] = 1; - - for (int i = 0; i < wCount; i++) - EnqueueRange(queue, buffer, 0, wBatch); - Console.WriteLine("done writer #1: {0} ms", Environment.TickCount - t1); - }, - () => { - var buffer = new int[wBatch]; - for (int i = 0; i < wBatch; i++) - buffer[i] = 1; - - for (int i = 0; i < wCount; i++) - EnqueueRange(queue, buffer, 0, wBatch); - Console.WriteLine("done writer #2: {0} ms", Environment.TickCount - t1); - }, - () => { - var buffer = new int[wBatch]; - for (int i = 0; i < wBatch; i++) - buffer[i] = 1; - - for (int i = 0; i < wCount; i++) - EnqueueRange(queue, buffer, 0, wBatch); - Console.WriteLine("done writer #3: {0} ms", Environment.TickCount - t1); - }, - () => { - var buffer = new int[rBatch]; - - while (read < total) { - int actual; - if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) { - for (int i = 0; i < actual; i++) - r1 += buffer[i]; - Interlocked.Add(ref read, actual); - } - } - - Console.WriteLine("done reader #1: {0} ms", Environment.TickCount - t1); - }/*, - () => { - var buffer = new int[rBatch]; - - while (read < total) { - int actual; - if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) { - for (int i = 0; i < actual; i++) - r2 += buffer[i]; - Interlocked.Add(ref read, actual); - } - } - - Console.WriteLine("done reader #2: {0} ms", Environment.TickCount - t1); - }*//*, - () => { - var buffer = new int[rBatch]; - - while (read < total) { - int actual; - if (TryDequeueRange(queue, buffer, 0, rBatch, out actual)) { - for (int i = 0; i < actual; i++) - r3 += buffer[i]; - Interlocked.Add(ref read, actual); - } - } - - Console.WriteLine("done reader #3: {0} ms", Environment.TickCount - t1); - }*/ - ) - .PromiseAll() - .Join(); - - - Console.WriteLine( - "done: {0} ms, summ#1: {1}, summ#2: {2}, total: {3}, count: {4}", - Environment.TickCount - t1, - r1, - r2, - r1 + r2 + r3, - total - ); + var t = Environment.TickCount; + using (var reader = JsonReader.Create("e:\\citylots.json")) { + while (reader.Read()) { + } + } + + Console.WriteLine($"JsonReader: {Environment.TickCount - t} ms"); Console.WriteLine("done"); }
--- a/Implab/Automaton/AutomatonConst.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Automaton/AutomatonConst.cs Tue Nov 21 14:57:58 2017 +0300 @@ -1,9 +1,9 @@ namespace Implab.Automaton { public static class AutomatonConst { - public const int UNREACHABLE_STATE = -1; + public const int UnreachableState = -1; - public const int UNCLASSIFIED_INPUT = 0; + public const int UnclassifiedInput = 0; } }
--- a/Implab/Automaton/DFATable.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Automaton/DFATable.cs Tue Nov 21 14:57:58 2017 +0300 @@ -116,7 +116,7 @@ for (int i = 0; i < StateCount; i++) for (int j = 0; j < AlphabetSize; j++) - table[i, j] = AutomatonConst.UNREACHABLE_STATE; + table[i, j] = AutomatonConst.UnreachableState; foreach (var t in this) table[t.s1,t.edge] = (byte)t.s2; @@ -290,11 +290,11 @@ var nextCls = 0; foreach (var item in minClasses) { - if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) + if (nextCls == AutomatonConst.UnclassifiedInput) nextCls++; // сохраняем DFAConst.UNCLASSIFIED_INPUT - var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++; + var cls = item.Contains(AutomatonConst.UnclassifiedInput) ? AutomatonConst.UnclassifiedInput : nextCls++; optimalDFA.AddSymbol(cls); foreach (var a in item) @@ -326,7 +326,7 @@ data.Add(String.Format( "{0} -> {2} [label={1}];", String.Join("", stateAlphabet.GetSymbols(t.s1)), - ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), + ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UnclassifiedInput ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), String.Join("", stateAlphabet.GetSymbols(t.s2)) )); data.Add("}");
--- a/Implab/Automaton/MapAlphabet.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Automaton/MapAlphabet.cs Tue Nov 21 14:57:58 2017 +0300 @@ -54,7 +54,7 @@ return cls; if (!m_supportUnclassified) throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); - return AutomatonConst.UNCLASSIFIED_INPUT; + return AutomatonConst.UnclassifiedInput; } public int Count {
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Tue Nov 21 14:57:58 2017 +0300 @@ -129,7 +129,7 @@ if (m_root == null) m_root = token; m_idx++; - m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; + m_indexes[m_idx] = AutomatonConst.UnclassifiedInput; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); Followpos(m_idx);
--- a/Implab/Formats/CharAlphabet.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/CharAlphabet.cs Tue Nov 21 14:57:58 2017 +0300 @@ -4,7 +4,7 @@ using System; namespace Implab.Formats { - public class CharAlphabet: IndexedAlphabetBase<char> { + public class CharAlphabet : IndexedAlphabetBase<char> { public override int GetSymbolIndex(char symbol) { return symbol;
--- a/Implab/Formats/CharMap.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/CharMap.cs Tue Nov 21 14:57:58 2017 +0300 @@ -25,7 +25,7 @@ } public bool Contains(char symbol) { - return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UNCLASSIFIED_INPUT; + return symbol >= m_min && symbol <= m_max && m_map[symbol-m_min] != AutomatonConst.UnclassifiedInput; } public IEnumerable<char> GetSymbols(int cls) { @@ -36,7 +36,7 @@ [MethodImpl(MethodImplOptions.AggressiveInlining)] public int Translate(char symbol) { - return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UNCLASSIFIED_INPUT; + return symbol >= m_min && symbol <= m_max ? m_map[symbol-m_min] : AutomatonConst.UnclassifiedInput; } } }
--- a/Implab/Formats/Grammar.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/Grammar.cs Tue Nov 21 14:57:58 2017 +0300 @@ -16,7 +16,7 @@ } protected SymbolToken UnclassifiedToken() { - return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT); + return new SymbolToken(AutomatonConst.UnclassifiedInput); } protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { @@ -42,7 +42,7 @@ int TranslateOrAdd(TSymbol ch) { var t = AlphabetBuilder.Translate(ch); - if (t == AutomatonConst.UNCLASSIFIED_INPUT) + if (t == AutomatonConst.UnclassifiedInput) t = AlphabetBuilder.DefineSymbol(ch); return t; } @@ -53,7 +53,7 @@ int TranslateOrDie(TSymbol ch) { var t = AlphabetBuilder.Translate(ch); - if (t == AutomatonConst.UNCLASSIFIED_INPUT) + if (t == AutomatonConst.UnclassifiedInput) throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); return t; }
--- a/Implab/Formats/InputScanner.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/InputScanner.cs Tue Nov 21 14:57:58 2017 +0300 @@ -69,7 +69,7 @@ while(offset < max) { next = m_dfa[next, m_alphabet.Translate(data[offset])]; - if (next == AutomatonConst.UNREACHABLE_STATE) { + if (next == AutomatonConst.UnreachableState) { // scanner stops on the next position after last recognized symbol m_position = offset; return false;
--- a/Implab/Formats/Json/JsonGrammar.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/Json/JsonGrammar.cs Tue Nov 21 14:57:58 2017 +0300 @@ -31,8 +31,8 @@ get { return _instance.Value; } } - readonly InputScanner<TokenType> m_jsonExpression; - readonly InputScanner<TokenType> m_stringExpression; + readonly FastInputScanner<TokenType> m_jsonExpression; + readonly FastInputScanner<TokenType> m_stringExpression; readonly CharAlphabet m_defaultAlphabet = new CharAlphabet(); public CharAlphabet DefaultAlphabet { get { return m_defaultAlphabet; } } @@ -87,15 +87,15 @@ .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - m_jsonExpression = BuildScanner(jsonExpression); - m_stringExpression = BuildScanner(jsonStringExpression); + m_jsonExpression = BuildFastScanner(jsonExpression); + m_stringExpression = BuildFastScanner(jsonStringExpression); } - public static InputScanner<TokenType> CreateJsonExpressionScanner() { + public static FastInputScanner<TokenType> CreateJsonExpressionScanner() { return Instance.m_jsonExpression.Clone(); } - public static InputScanner<TokenType> CreateStringExpressionScanner() { + public static FastInputScanner<TokenType> CreateStringExpressionScanner() { return Instance.m_stringExpression.Clone(); } @@ -109,7 +109,7 @@ return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); } - public InputScanner<TokenType> BuildScanner(Token regexp) { + public FastInputScanner<TokenType> BuildFastScanner(Token regexp) { var dfa = new RegularDFA<char, TokenType>(AlphabetBuilder); var visitor = new RegularExpressionVisitor<TokenType>(dfa); @@ -122,12 +122,12 @@ var ab = new CharAlphabet(); var optimal = dfa.Optimize(ab); - return new InputScanner<TokenType>( + return new FastInputScanner<TokenType>( optimal.CreateTransitionTable(), optimal.CreateFinalStateTable(), NormalizeTags(optimal.CreateTagTable()), optimal.InitialState, - ab.CreateCharMap() + ab.GetTranslationMap() ); }
--- a/Implab/Formats/Json/JsonReader.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/Json/JsonReader.cs Tue Nov 21 14:57:58 2017 +0300 @@ -48,7 +48,7 @@ public bool Move(JsonTokenType token) { var next = m_dfa[m_state, (int)token]; - if (next == AutomatonConst.UNREACHABLE_STATE) + if (next == AutomatonConst.UnreachableState) return false; m_state = next; return true; @@ -116,7 +116,7 @@ MemberContext m_memberContext = MemberContext.MemberValue; JsonElementType m_elementType; - object m_elementValue; + string m_elementValue; string m_memberName = String.Empty; Stack<ParserContext> m_stack = new Stack<ParserContext>(); @@ -152,7 +152,7 @@ /// <summary> /// Значение элемента. Только для элементов типа <see cref="JsonElementType.Value"/>, для остальных <c>null</c> /// </summary> - public object ElementValue { + public string ElementValue { get { return m_elementValue; } } @@ -213,11 +213,11 @@ return true; case JsonTokenType.Number: m_elementType = JsonElementType.Value; - m_elementValue = double.Parse(tokenValue, CultureInfo.InvariantCulture); + m_elementValue = tokenValue; return true; case JsonTokenType.Literal: m_elementType = JsonElementType.Value; - m_elementValue = ParseLiteral(tokenValue); + m_elementValue = tokenValue == "null" ? null : tokenValue; return true; case JsonTokenType.NameSeparator: m_memberContext = MemberContext.MemberValue;
--- a/Implab/Formats/Json/JsonScanner.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Formats/Json/JsonScanner.cs Tue Nov 21 14:57:58 2017 +0300 @@ -10,8 +10,8 @@ /// Сканнер (лексер), разбивающий поток символов на токены JSON. /// </summary> public abstract class JsonScanner : Disposable { - readonly InputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); - readonly InputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); + readonly FastInputScanner<JsonGrammar.TokenType> m_jsonContext = JsonGrammar.CreateJsonExpressionScanner(); + readonly FastInputScanner<JsonGrammar.TokenType> m_stringContext = JsonGrammar.CreateStringExpressionScanner(); readonly char[] m_unescapeBuf = new char[4]; readonly char[] m_buffer; @@ -25,7 +25,7 @@ m_length = length; } - bool ReadChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { + bool ReadChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { scanner.ResetState(); while(scanner.Scan(m_buffer, m_pos, m_length)) { @@ -71,7 +71,7 @@ return true; } - bool ReadStringChunk(InputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { + bool ReadStringChunk(FastInputScanner<JsonGrammar.TokenType> scanner, out JsonGrammar.TokenType tokenType) { scanner.ResetState(); while (scanner.Scan(m_buffer, m_pos, m_length)) { @@ -107,7 +107,7 @@ // scanner stops as scannerPos if (!scanner.IsFinal) - throw new ParserException($"Unexpected character '{m_buffer[scannerPos + 1]}'"); + throw new ParserException($"Unexpected character '{m_buffer[scannerPos]}'"); if (scannerPos != m_pos) { m_tokenBuilder.Append(m_buffer, m_pos, scannerPos - m_pos);
--- a/Implab/Implab.csproj Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Implab.csproj Tue Nov 21 14:57:58 2017 +0300 @@ -55,6 +55,7 @@ <Compile Include="Diagnostics\TraceEventType.cs" /> <Compile Include="Diagnostics\TraceSourceAttribute.cs" /> <Compile Include="Formats\CharMap.cs" /> + <Compile Include="Formats\FastInpurScanner.cs" /> <Compile Include="Formats\InputScanner.cs" /> <Compile Include="Formats\Json\JsonStringScanner.cs" /> <Compile Include="Formats\Json\JsonTextScanner.cs" />
--- a/Implab/Xml/JsonXmlReader.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Xml/JsonXmlReader.cs Tue Nov 21 14:57:58 2017 +0300 @@ -39,7 +39,7 @@ int m_xmlDepth; XmlSimpleAttribute[] m_attributes; - object m_value; + string m_value; bool m_isEmpty; XmlNodeType m_nodeType = XmlNodeType.None; @@ -158,29 +158,13 @@ public override string Value { get { - return ConvertValueToString(m_value); + return m_value; } } - - static string ConvertValueToString(object value) { - if (value == null) - return string.Empty; - - switch (Convert.GetTypeCode(value)) { - case TypeCode.Double: - return ((double)value).ToString(CultureInfo.InvariantCulture); - case TypeCode.String: - return (string)value; - case TypeCode.Boolean: - return (bool)value ? "true" : "false"; - default: - return value.ToString(); - } - } - + public override string GetAttribute(int i) { Safe.ArgumentInRange(i, 0, AttributeCount - 1, nameof(i)); - return ConvertValueToString(m_attributes[i].Value); + return m_attributes[i].Value; } public override string GetAttribute(string name) { @@ -188,7 +172,7 @@ return null; var qName = m_context.Resolve(name); var attr = Array.Find(m_attributes, x => x.QName == qName); - var value = ConvertValueToString(attr?.Value); + var value = attr?.Value; return value == string.Empty ? null : value; } @@ -197,7 +181,7 @@ return null; var qName = new XmlQualifiedName(name, namespaceURI); var attr = Array.Find(m_attributes, x => x.QName == qName); - var value = ConvertValueToString(attr?.Value); + var value = attr?.Value; return value == string.Empty ? null : value; } @@ -319,7 +303,7 @@ } } - void ValueNode(object value) { + void ValueNode(string value) { if (!IsSibling()) // the node is nested m_xmlDepth++; @@ -344,11 +328,11 @@ if (attr.QName.Name == "xmlns") { if (context == m_context) context = new XmlNameContext(m_context, m_xmlDepth); - context.DefinePrefix(ConvertValueToString(attr.Value), string.Empty); + context.DefinePrefix(attr.Value, string.Empty); } else if (attr.Prefix == m_xmlnsPrefix) { if (context == m_context) context = new XmlNameContext(m_context, m_xmlDepth); - context.DefinePrefix(ConvertValueToString(attr.Value), attr.QName.Name); + context.DefinePrefix(attr.Value, attr.QName.Name); } else { string attrPrefix; if (string.IsNullOrEmpty(attr.QName.Namespace)) @@ -516,7 +500,7 @@ m_jsonValueName, m_jsonNamespace, new[] { - new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, true) + new XmlSimpleAttribute("nil", m_xsiNamespace, m_xsiPrefix, "true") }, true ); @@ -607,7 +591,7 @@ public override string ToString() { switch (NodeType) { case XmlNodeType.Element: - return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{ConvertValueToString(x.Value)}'"))} {(IsEmptyElement ? "/" : "")}>"; + return $"<{Name} {string.Join(" ", (m_attributes ?? new XmlSimpleAttribute[0]).Select(x => $"{x.Prefix}{(string.IsNullOrEmpty(x.Prefix) ? "" : ":")}{x.QName.Name}='{x.Value}'"))} {(IsEmptyElement ? "/" : "")}>"; case XmlNodeType.Attribute: return $"@{Name}"; case XmlNodeType.Text:
--- a/Implab/Xml/XmlSimpleAttribute.cs Thu Oct 05 09:24:49 2017 +0300 +++ b/Implab/Xml/XmlSimpleAttribute.cs Tue Nov 21 14:57:58 2017 +0300 @@ -7,7 +7,7 @@ namespace Implab.Xml { public class XmlSimpleAttribute { - public XmlSimpleAttribute(string name, string ns, string prefix, object value) { + public XmlSimpleAttribute(string name, string ns, string prefix, string value) { QName = new XmlQualifiedName(name, ns); Prefix = prefix; Value = value; @@ -17,6 +17,6 @@ public string Prefix { get; set; } - public object Value { get; set; } + public string Value { get; set; } } }