# HG changeset patch # User cin # Date 1458834730 -10800 # Node ID 76e8f2ba12b82dd5295d3ce794314e9922b39e99 # Parent b2b6a6640aa339983e3799229fb9716931581663 pretty print DFA, the minimization is still buggy diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj --- a/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Thu Mar 24 18:52:10 2016 +0300 @@ -10,6 +10,7 @@ Implab.Format.Test Implab.Format.Test v4.5 + 0.2 true @@ -32,7 +33,7 @@ - ..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll + ..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll @@ -40,6 +41,12 @@ + + {F550F1F8-8746-4AD0-9614-855F4C4B7F05} + Implab + + + \ No newline at end of file diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab.Test/Implab.Format.Test/JsonTests.cs --- a/Implab.Test/Implab.Format.Test/JsonTests.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab.Test/Implab.Format.Test/JsonTests.cs Thu Mar 24 18:52:10 2016 +0300 @@ -1,11 +1,49 @@ using NUnit.Framework; using System; +using Implab.Formats.JSON; namespace Implab.Format.Test { - [TestFixture()] + [TestFixture] public class JsonTests { - [Test()] - public void TestCase() { + [Test] + public void TestScannerValidTokens() { + var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:"); + + Tuple[] expexted = new [] { + new Tuple(JsonTokenType.Number, 9123d), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Number, -123d ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Number, 0d ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Number, 0.1d ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Number, -0.2d ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Number, -0.1e3d ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Number, 1.3E-3d ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.String, "some \t\n text" ), + new Tuple(JsonTokenType.ValueSeparator, ", " ), + new Tuple(JsonTokenType.Literal, "literal" ), + new Tuple(JsonTokenType.BeginArray, " [" ), + new Tuple(JsonTokenType.EndArray, "]" ), + new Tuple(JsonTokenType.BeginObject, "{" ), + new Tuple(JsonTokenType.EndObject, "}" ), + new Tuple(JsonTokenType.NameSeparator, ":" ) + }; + + object value; + JsonTokenType tokenType; + for (var i = 0; i < expexted.Length; i++) { + + Assert.IsTrue(scanner.ReadToken(out value, out tokenType)); + Assert.AreEqual(expexted[i].Item1, tokenType); + Assert.AreEqual(expexted[i].Item2, value); + } + + Assert.IsFalse(scanner.ReadToken(out value, out tokenType)); } } } diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab.Test/Implab.Format.Test/packages.config --- a/Implab.Test/Implab.Format.Test/packages.config Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab.Test/Implab.Format.Test/packages.config Thu Mar 24 18:52:10 2016 +0300 @@ -1,4 +1,4 @@  - + \ No newline at end of file diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Automaton/DFATable.cs --- a/Implab/Automaton/DFATable.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Automaton/DFATable.cs Thu Mar 24 18:52:10 2016 +0300 @@ -3,6 +3,9 @@ using System.Collections.Generic; using System.Linq; using System.Diagnostics; +using System.IO; +using System.CodeDom.Compiler; +using System.CodeDom; namespace Implab.Automaton { public class DFATable : IDFATableBuilder { @@ -103,6 +106,11 @@ return GetEnumerator(); } + public void AddSymbol(int symbol) { + Safe.ArgumentAssert(symbol >= 0, "symbol"); + m_symbolCount = Math.Max(symbol + 1, m_symbolCount); + } + public int[,] CreateTransitionTable() { var table = new int[StateCount,AlphabetSize]; @@ -162,7 +170,7 @@ var state = new HashSet( Enumerable - .Range(0, m_stateCount - 1) + .Range(0, m_stateCount) .Where(i => !m_finalStates.Contains(i)) ); @@ -182,10 +190,13 @@ for (int c = 0; c < m_symbolCount; c++) { var stateX = new HashSet(); - foreach(var a in stateA.Where(rmap.ContainsKey)) - stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' + //foreach(var a in stateA.Where(rmap.ContainsKey)) + // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' - foreach (var stateY in optimalStates.ToArray()) { + stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1)); + + var tmp = optimalStates.ToArray(); + foreach (var stateY in tmp) { if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { var stateR1 = new HashSet(stateY); var stateR2 = new HashSet(stateY); @@ -245,12 +256,8 @@ foreach (var term in A) { // ищем все переходы класса по символу term - var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray(); + var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First(); - Debug.Assert(res.Length <= 1); - - var s2 = res.Length > 0 ? res[0] : -1; - HashSet a2; if (!classes.TryGetValue(s2, out a2)) { a2 = new HashSet(); @@ -283,6 +290,7 @@ // сохраняем DFAConst.UNCLASSIFIED_INPUT var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++; + optimalDFA.AddSymbol(cls); foreach (var a in item) alphabetMap[a] = cls; @@ -298,19 +306,38 @@ optimalDFA.Add(t); } - protected void PrintDFA(IAlphabet inputAlphabet, IAlphabet stateAlphabet) { + protected string PrintDFA(IAlphabet inputAlphabet, IAlphabet stateAlphabet) { Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); - foreach(var t in m_transitions) - Console.WriteLine( - "[{0}] -{{{1}}}-> [{2}]{3}", - String.Join(",", stateAlphabet.GetSymbols(t.s1)), - String.Join("", inputAlphabet.GetSymbols(t.edge)), - String.Join(",", stateAlphabet.GetSymbols(t.s2)), - m_finalStates.Contains(t.s2) ? "$" : "" - ); + var data = new List(); + + data.Add("digraph dfa {"); + + foreach (var final in m_finalStates) + data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final)))); + + foreach (var t in m_transitions) + data.Add(String.Format( + "{0} -> {2} [label={1}];", + String.Join("", stateAlphabet.GetSymbols(t.s1)), + ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), + String.Join("", stateAlphabet.GetSymbols(t.s2)) + )); + data.Add("}"); + return String.Join("\n", data); } + static string ToLiteral(string input) + { + using (var writer = new StringWriter()) + { + using (var provider = CodeDomProvider.CreateProvider("CSharp")) + { + provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); + return writer.ToString(); + } + } + } } } diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Automaton/IDFATableBuilder.cs --- a/Implab/Automaton/IDFATableBuilder.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Automaton/IDFATableBuilder.cs Thu Mar 24 18:52:10 2016 +0300 @@ -10,6 +10,17 @@ void MarkFinalState(int state); void SetInitialState(int s); + + /// + /// Increases if needed the input alphabet size to hold the specified symbol. + /// + /// + /// + /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1) + /// + /// + /// Symbol. + void AddSymbol(int symbol); } } diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Automaton/RegularExpressions/RegularDFA.cs --- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Thu Mar 24 18:52:10 2016 +0300 @@ -66,6 +66,9 @@ // skip all unclassified symbols foreach (var pair in alphaMap.Where(x => x.Value != 0)) alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); + + var orig = ToString(); + var opt = dfa.ToString(); return dfa; } @@ -78,6 +81,15 @@ return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g)); } + public override string ToString() { + var states = new MapAlphabet(false, null); + + for (int i = 0; i < StateCount; i++) + states.DefineSymbol(string.Format("s{0}", i), i); + + return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize); + } + } } diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Components/LazyAndWeak.cs --- a/Implab/Components/LazyAndWeak.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Components/LazyAndWeak.cs Thu Mar 24 18:52:10 2016 +0300 @@ -7,7 +7,7 @@ /// /// /// Usefull when dealing with memory-intensive objects which are frequently used. - /// This class is similar to except is a singleton. + /// This class is similar to except it is a singleton. /// public class LazyAndWeak where T : class { @@ -44,6 +44,7 @@ } else { lock (m_lock) { // double check + weak = m_reference; if (weak != null) { value = weak.Target as T; if (value != null) diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Formats/JSON/JSONGrammar.cs --- a/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 24 18:52:10 2016 +0300 @@ -108,7 +108,7 @@ } Token SymbolRangeToken(char start, char stop) { - return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x)); + return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); } protected override IndexedAlphabetBase CreateAlphabet() { diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Formats/StringScanner.cs --- a/Implab/Formats/StringScanner.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Formats/StringScanner.cs Thu Mar 24 18:52:10 2016 +0300 @@ -4,22 +4,14 @@ public class StringScanner: TextScanner { const int CHUNK_SIZE = 1024; - readonly string m_text; - int m_pos; - - public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) { - m_text = text; - Feed(); + public StringScanner(string text) : base(null) { + Safe.ArgumentNotNull(text, "text"); + var data = text.ToCharArray(); + Feed(data, 0, data.Length); } protected override int Read(char[] buffer, int offset, int size) { - var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size; - - m_text.CopyTo(m_pos,buffer,offset, actual); - - m_pos += actual; - - return actual; + return 0; } } } diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Formats/TextScanner.cs --- a/Implab/Formats/TextScanner.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Formats/TextScanner.cs Thu Mar 24 18:52:10 2016 +0300 @@ -53,29 +53,24 @@ tag = null; var maxSymbol = alphabet.Length - 1; - + int next; do { // after the next chunk is read the offset in the buffer may change int pos = m_bufferOffset + m_tokenLength; - + next = state; while (pos < m_bufferSize) { var ch = m_buffer[pos]; - try { - var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; + next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; if (next == AutomatonConst.UNREACHABLE_STATE) break; - + state = next; - }catch { - throw; - } pos++; } - m_tokenLength = pos - m_bufferOffset; - } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); + } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); m_tokenOffset = m_bufferOffset; m_bufferOffset += m_tokenLength; @@ -150,7 +145,7 @@ } public void CopyTokenTo(char[] buffer, int offset) { - m_buffer.CopyTo(buffer, offset); + Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); } public void CopyTokenTo(StringBuilder sb) {