changeset 182:76e8f2ba12b8 ref20160224

pretty print DFA, the minimization is still buggy
author cin
date Thu, 24 Mar 2016 18:52:10 +0300
parents b2b6a6640aa3
children 4f82e0f161c3
files Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Implab.Test/Implab.Format.Test/JsonTests.cs Implab.Test/Implab.Format.Test/packages.config Implab/Automaton/DFATable.cs Implab/Automaton/IDFATableBuilder.cs Implab/Automaton/RegularExpressions/RegularDFA.cs Implab/Components/LazyAndWeak.cs Implab/Formats/JSON/JSONGrammar.cs Implab/Formats/StringScanner.cs Implab/Formats/TextScanner.cs
diffstat 10 files changed, 132 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj	Thu Mar 24 18:52:10 2016 +0300
@@ -10,6 +10,7 @@
     <RootNamespace>Implab.Format.Test</RootNamespace>
     <AssemblyName>Implab.Format.Test</AssemblyName>
     <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
+    <ReleaseVersion>0.2</ReleaseVersion>
   </PropertyGroup>
   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
     <DebugSymbols>true</DebugSymbols>
@@ -32,7 +33,7 @@
   <ItemGroup>
     <Reference Include="System" />
     <Reference Include="nunit.framework">
-      <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath>
+      <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath>
     </Reference>
   </ItemGroup>
   <ItemGroup>
@@ -40,6 +41,12 @@
   </ItemGroup>
   <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
   <ItemGroup>
+    <ProjectReference Include="..\..\Implab\Implab.csproj">
+      <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project>
+      <Name>Implab</Name>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
     <None Include="packages.config" />
   </ItemGroup>
 </Project>
\ No newline at end of file
--- a/Implab.Test/Implab.Format.Test/JsonTests.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab.Test/Implab.Format.Test/JsonTests.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -1,11 +1,49 @@
 using NUnit.Framework;
 using System;
+using Implab.Formats.JSON;
 
 namespace Implab.Format.Test {
-    [TestFixture()]
+    [TestFixture]
     public class JsonTests {
-        [Test()]
-        public void TestCase() {
+        [Test]
+        public void TestScannerValidTokens() {
+            var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
+
+            Tuple<JsonTokenType,object>[] expexted =  new [] {
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n  text" ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ),
+                new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" )
+            };
+
+            object value;
+            JsonTokenType tokenType;
+            for (var i = 0; i < expexted.Length; i++) {
+                
+                Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
+                Assert.AreEqual(expexted[i].Item1, tokenType);
+                Assert.AreEqual(expexted[i].Item2, value);
+            }
+
+            Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
         }
     }
 }
--- a/Implab.Test/Implab.Format.Test/packages.config	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab.Test/Implab.Format.Test/packages.config	Thu Mar 24 18:52:10 2016 +0300
@@ -1,4 +1,4 @@
 <?xml version="1.0" encoding="utf-8"?>
 <packages>
-  <package id="NUnit" version="3.0.1" targetFramework="net45" />
+  <package id="NUnit" version="2.6.4" targetFramework="net45" />
 </packages>
\ No newline at end of file
--- a/Implab/Automaton/DFATable.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Automaton/DFATable.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -3,6 +3,9 @@
 using System.Collections.Generic;
 using System.Linq;
 using System.Diagnostics;
+using System.IO;
+using System.CodeDom.Compiler;
+using System.CodeDom;
 
 namespace Implab.Automaton {
     public class DFATable : IDFATableBuilder {
@@ -103,6 +106,11 @@
             return GetEnumerator();
         }
 
+        public void AddSymbol(int symbol) {
+            Safe.ArgumentAssert(symbol >= 0, "symbol");
+            m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
+        }
+
         public int[,] CreateTransitionTable() {
             var table = new int[StateCount,AlphabetSize];
 
@@ -162,7 +170,7 @@
 
             var state = new HashSet<int>(
                 Enumerable
-                .Range(0, m_stateCount - 1)
+                .Range(0, m_stateCount)
                 .Where(i => !m_finalStates.Contains(i))
             );
 
@@ -182,10 +190,13 @@
 
                 for (int c = 0; c < m_symbolCount; c++) {
                     var stateX = new HashSet<int>();
-                    foreach(var a in stateA.Where(rmap.ContainsKey))
-                        stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
+                    //foreach(var a in stateA.Where(rmap.ContainsKey))
+                    //    stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
 
-                    foreach (var stateY in optimalStates.ToArray()) {
+                    stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
+
+                    var tmp = optimalStates.ToArray();
+                    foreach (var stateY in tmp) {
                         if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
                             var stateR1 = new HashSet<int>(stateY);
                             var stateR2 = new HashSet<int>(stateY);
@@ -245,12 +256,8 @@
 
                     foreach (var term in A) {
                         // ищем все переходы класса по символу term
-                        var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
+                        var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
 
-                        Debug.Assert(res.Length <= 1);
-
-                        var s2 = res.Length > 0 ? res[0] : -1;
-                            
                         HashSet<int> a2;
                         if (!classes.TryGetValue(s2, out a2)) {
                             a2 = new HashSet<int>();
@@ -283,6 +290,7 @@
 
                 // сохраняем DFAConst.UNCLASSIFIED_INPUT
                 var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
+                optimalDFA.AddSymbol(cls);
 
                 foreach (var a in item)
                     alphabetMap[a] = cls;
@@ -298,19 +306,38 @@
                 optimalDFA.Add(t);
         }
 
-        protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
+        protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) {
             Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
             Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
 
-            foreach(var t in m_transitions)
-                Console.WriteLine(
-                    "[{0}] -{{{1}}}-> [{2}]{3}",
-                    String.Join(",", stateAlphabet.GetSymbols(t.s1)),
-                    String.Join("", inputAlphabet.GetSymbols(t.edge)),
-                    String.Join(",", stateAlphabet.GetSymbols(t.s2)),
-                    m_finalStates.Contains(t.s2) ? "$" : ""
-                );
+            var data = new List<string>();
+
+            data.Add("digraph dfa {");
+
+            foreach (var final in m_finalStates)
+                data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
+
+            foreach (var t in m_transitions)
+                data.Add(String.Format(
+                    "{0} -> {2} [label={1}];",
+                    String.Join("", stateAlphabet.GetSymbols(t.s1)),
+                    ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
+                    String.Join("", stateAlphabet.GetSymbols(t.s2))
+                ));
+            data.Add("}");
+            return String.Join("\n", data);
         }
 
+        static string ToLiteral(string input)
+        {
+            using (var writer = new StringWriter())
+            {
+                using (var provider = CodeDomProvider.CreateProvider("CSharp"))
+                {
+                    provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
+                    return writer.ToString();
+                }
+            }
+        }
     }
 }
--- a/Implab/Automaton/IDFATableBuilder.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Automaton/IDFATableBuilder.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -10,6 +10,17 @@
         void MarkFinalState(int state);
 
         void SetInitialState(int s);
+
+        /// <summary>
+        /// Increases if needed the input alphabet size to hold the specified symbol.
+        /// </summary>
+        /// <remarks>
+        /// <code>
+        /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1)
+        /// </code>
+        /// </remarks>
+        /// <param name="symbol">Symbol.</param>
+        void AddSymbol(int symbol);
     }
 }
 
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -66,6 +66,9 @@
             // skip all unclassified symbols
             foreach (var pair in alphaMap.Where(x => x.Value != 0))
                 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
+
+            var orig = ToString();
+            var opt = dfa.ToString();
             
             return dfa;
         }
@@ -78,6 +81,15 @@
             return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
         }
 
+        public override string ToString() {
+            var states = new MapAlphabet<string>(false, null);
+
+            for (int i = 0; i < StateCount; i++)
+                states.DefineSymbol(string.Format("s{0}", i), i);
+
+            return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
+        }
+
     }
 }
 
--- a/Implab/Components/LazyAndWeak.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Components/LazyAndWeak.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -7,7 +7,7 @@
     /// </summary>
     /// <remarks>
     /// Usefull when dealing with memory-intensive objects which are frequently used.
-    /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton.
+    /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton.
     /// </remarks>
     public class LazyAndWeak<T> where T : class {
 
@@ -44,6 +44,7 @@
                     } else {
                         lock (m_lock) {
                             // double check
+                            weak = m_reference;
                             if (weak != null) {
                                 value = weak.Target as T;
                                 if (value != null)
--- a/Implab/Formats/JSON/JSONGrammar.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Formats/JSON/JSONGrammar.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -108,7 +108,7 @@
         }
 
         Token SymbolRangeToken(char start, char stop) {
-            return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
+            return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
         }
 
         protected override IndexedAlphabetBase<char> CreateAlphabet() {
--- a/Implab/Formats/StringScanner.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Formats/StringScanner.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -4,22 +4,14 @@
     public class StringScanner: TextScanner {
         const int CHUNK_SIZE = 1024;
 
-        readonly string m_text;
-        int m_pos;
-
-        public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) {
-            m_text = text;
-            Feed();
+        public StringScanner(string text) : base(null) {
+            Safe.ArgumentNotNull(text, "text");
+            var data = text.ToCharArray();
+            Feed(data, 0, data.Length);
         }
 
         protected override int Read(char[] buffer, int offset, int size) {
-            var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size;
-
-            m_text.CopyTo(m_pos,buffer,offset, actual);
-
-            m_pos += actual;
-
-            return actual;
+            return 0;
         }
     }
 }
--- a/Implab/Formats/TextScanner.cs	Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Formats/TextScanner.cs	Thu Mar 24 18:52:10 2016 +0300
@@ -53,29 +53,24 @@
             tag = null;
 
             var maxSymbol = alphabet.Length - 1;
-
+            int next;
             do {
                 // after the next chunk is read the offset in the buffer may change
                 int pos = m_bufferOffset + m_tokenLength;
-
+                next = state;
                 while (pos < m_bufferSize) {
                     var ch = m_buffer[pos];
 
-                    try {
-                    var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
+                    next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
                     
                     if (next == AutomatonConst.UNREACHABLE_STATE)
                         break;
-
+                 
                     state = next;
-                    }catch {
-                        throw;
-                    }
                     pos++;
                 }
-
                 m_tokenLength = pos - m_bufferOffset;
-            } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
+            } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
 
             m_tokenOffset = m_bufferOffset;
             m_bufferOffset += m_tokenLength;
@@ -150,7 +145,7 @@
         }
 
         public void CopyTokenTo(char[] buffer, int offset) {
-            m_buffer.CopyTo(buffer, offset);
+            Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
         }
 
         public void CopyTokenTo(StringBuilder sb) {