# HG changeset patch
# User cin
# Date 1458834730 -10800
# Node ID 76e8f2ba12b82dd5295d3ce794314e9922b39e99
# Parent b2b6a6640aa339983e3799229fb9716931581663
pretty print DFA, the minimization is still buggy
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj
--- a/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Thu Mar 24 18:52:10 2016 +0300
@@ -10,6 +10,7 @@
Implab.Format.Test
Implab.Format.Test
v4.5
+ 0.2
true
@@ -32,7 +33,7 @@
- ..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll
+ ..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll
@@ -40,6 +41,12 @@
+
+ {F550F1F8-8746-4AD0-9614-855F4C4B7F05}
+ Implab
+
+
+
\ No newline at end of file
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab.Test/Implab.Format.Test/JsonTests.cs
--- a/Implab.Test/Implab.Format.Test/JsonTests.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab.Test/Implab.Format.Test/JsonTests.cs Thu Mar 24 18:52:10 2016 +0300
@@ -1,11 +1,49 @@
using NUnit.Framework;
using System;
+using Implab.Formats.JSON;
namespace Implab.Format.Test {
- [TestFixture()]
+ [TestFixture]
public class JsonTests {
- [Test()]
- public void TestCase() {
+ [Test]
+ public void TestScannerValidTokens() {
+ var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:");
+
+ Tuple[] expexted = new [] {
+ new Tuple(JsonTokenType.Number, 9123d),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Number, -123d ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Number, 0d ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Number, 0.1d ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Number, -0.2d ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Number, -0.1e3d ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Number, 1.3E-3d ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.String, "some \t\n text" ),
+ new Tuple(JsonTokenType.ValueSeparator, ", " ),
+ new Tuple(JsonTokenType.Literal, "literal" ),
+ new Tuple(JsonTokenType.BeginArray, " [" ),
+ new Tuple(JsonTokenType.EndArray, "]" ),
+ new Tuple(JsonTokenType.BeginObject, "{" ),
+ new Tuple(JsonTokenType.EndObject, "}" ),
+ new Tuple(JsonTokenType.NameSeparator, ":" )
+ };
+
+ object value;
+ JsonTokenType tokenType;
+ for (var i = 0; i < expexted.Length; i++) {
+
+ Assert.IsTrue(scanner.ReadToken(out value, out tokenType));
+ Assert.AreEqual(expexted[i].Item1, tokenType);
+ Assert.AreEqual(expexted[i].Item2, value);
+ }
+
+ Assert.IsFalse(scanner.ReadToken(out value, out tokenType));
}
}
}
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab.Test/Implab.Format.Test/packages.config
--- a/Implab.Test/Implab.Format.Test/packages.config Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab.Test/Implab.Format.Test/packages.config Thu Mar 24 18:52:10 2016 +0300
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Automaton/DFATable.cs
--- a/Implab/Automaton/DFATable.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Automaton/DFATable.cs Thu Mar 24 18:52:10 2016 +0300
@@ -3,6 +3,9 @@
using System.Collections.Generic;
using System.Linq;
using System.Diagnostics;
+using System.IO;
+using System.CodeDom.Compiler;
+using System.CodeDom;
namespace Implab.Automaton {
public class DFATable : IDFATableBuilder {
@@ -103,6 +106,11 @@
return GetEnumerator();
}
+ public void AddSymbol(int symbol) {
+ Safe.ArgumentAssert(symbol >= 0, "symbol");
+ m_symbolCount = Math.Max(symbol + 1, m_symbolCount);
+ }
+
public int[,] CreateTransitionTable() {
var table = new int[StateCount,AlphabetSize];
@@ -162,7 +170,7 @@
var state = new HashSet(
Enumerable
- .Range(0, m_stateCount - 1)
+ .Range(0, m_stateCount)
.Where(i => !m_finalStates.Contains(i))
);
@@ -182,10 +190,13 @@
for (int c = 0; c < m_symbolCount; c++) {
var stateX = new HashSet();
- foreach(var a in stateA.Where(rmap.ContainsKey))
- stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
+ //foreach(var a in stateA.Where(rmap.ContainsKey))
+ // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
- foreach (var stateY in optimalStates.ToArray()) {
+ stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1));
+
+ var tmp = optimalStates.ToArray();
+ foreach (var stateY in tmp) {
if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
var stateR1 = new HashSet(stateY);
var stateR2 = new HashSet(stateY);
@@ -245,12 +256,8 @@
foreach (var term in A) {
// ищем все переходы класса по символу term
- var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
+ var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First();
- Debug.Assert(res.Length <= 1);
-
- var s2 = res.Length > 0 ? res[0] : -1;
-
HashSet a2;
if (!classes.TryGetValue(s2, out a2)) {
a2 = new HashSet();
@@ -283,6 +290,7 @@
// сохраняем DFAConst.UNCLASSIFIED_INPUT
var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
+ optimalDFA.AddSymbol(cls);
foreach (var a in item)
alphabetMap[a] = cls;
@@ -298,19 +306,38 @@
optimalDFA.Add(t);
}
- protected void PrintDFA(IAlphabet inputAlphabet, IAlphabet stateAlphabet) {
+ protected string PrintDFA(IAlphabet inputAlphabet, IAlphabet stateAlphabet) {
Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet");
Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet");
- foreach(var t in m_transitions)
- Console.WriteLine(
- "[{0}] -{{{1}}}-> [{2}]{3}",
- String.Join(",", stateAlphabet.GetSymbols(t.s1)),
- String.Join("", inputAlphabet.GetSymbols(t.edge)),
- String.Join(",", stateAlphabet.GetSymbols(t.s2)),
- m_finalStates.Contains(t.s2) ? "$" : ""
- );
+ var data = new List();
+
+ data.Add("digraph dfa {");
+
+ foreach (var final in m_finalStates)
+ data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final))));
+
+ foreach (var t in m_transitions)
+ data.Add(String.Format(
+ "{0} -> {2} [label={1}];",
+ String.Join("", stateAlphabet.GetSymbols(t.s1)),
+ ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))),
+ String.Join("", stateAlphabet.GetSymbols(t.s2))
+ ));
+ data.Add("}");
+ return String.Join("\n", data);
}
+ static string ToLiteral(string input)
+ {
+ using (var writer = new StringWriter())
+ {
+ using (var provider = CodeDomProvider.CreateProvider("CSharp"))
+ {
+ provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null);
+ return writer.ToString();
+ }
+ }
+ }
}
}
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Automaton/IDFATableBuilder.cs
--- a/Implab/Automaton/IDFATableBuilder.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Automaton/IDFATableBuilder.cs Thu Mar 24 18:52:10 2016 +0300
@@ -10,6 +10,17 @@
void MarkFinalState(int state);
void SetInitialState(int s);
+
+ ///
+ /// Increases if needed the input alphabet size to hold the specified symbol.
+ ///
+ ///
+ ///
+ /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1)
+ ///
+ ///
+ /// Symbol.
+ void AddSymbol(int symbol);
}
}
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Automaton/RegularExpressions/RegularDFA.cs
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Thu Mar 24 18:52:10 2016 +0300
@@ -66,6 +66,9 @@
// skip all unclassified symbols
foreach (var pair in alphaMap.Where(x => x.Value != 0))
alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
+
+ var orig = ToString();
+ var opt = dfa.ToString();
return dfa;
}
@@ -78,6 +81,15 @@
return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g));
}
+ public override string ToString() {
+ var states = new MapAlphabet(false, null);
+
+ for (int i = 0; i < StateCount; i++)
+ states.DefineSymbol(string.Format("s{0}", i), i);
+
+ return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
+ }
+
}
}
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Components/LazyAndWeak.cs
--- a/Implab/Components/LazyAndWeak.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Components/LazyAndWeak.cs Thu Mar 24 18:52:10 2016 +0300
@@ -7,7 +7,7 @@
///
///
/// Usefull when dealing with memory-intensive objects which are frequently used.
- /// This class is similar to except is a singleton.
+ /// This class is similar to except it is a singleton.
///
public class LazyAndWeak where T : class {
@@ -44,6 +44,7 @@
} else {
lock (m_lock) {
// double check
+ weak = m_reference;
if (weak != null) {
value = weak.Target as T;
if (value != null)
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Formats/JSON/JSONGrammar.cs
--- a/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 24 18:52:10 2016 +0300
@@ -108,7 +108,7 @@
}
Token SymbolRangeToken(char start, char stop) {
- return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
+ return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x));
}
protected override IndexedAlphabetBase CreateAlphabet() {
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Formats/StringScanner.cs
--- a/Implab/Formats/StringScanner.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Formats/StringScanner.cs Thu Mar 24 18:52:10 2016 +0300
@@ -4,22 +4,14 @@
public class StringScanner: TextScanner {
const int CHUNK_SIZE = 1024;
- readonly string m_text;
- int m_pos;
-
- public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) {
- m_text = text;
- Feed();
+ public StringScanner(string text) : base(null) {
+ Safe.ArgumentNotNull(text, "text");
+ var data = text.ToCharArray();
+ Feed(data, 0, data.Length);
}
protected override int Read(char[] buffer, int offset, int size) {
- var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size;
-
- m_text.CopyTo(m_pos,buffer,offset, actual);
-
- m_pos += actual;
-
- return actual;
+ return 0;
}
}
}
diff -r b2b6a6640aa3 -r 76e8f2ba12b8 Implab/Formats/TextScanner.cs
--- a/Implab/Formats/TextScanner.cs Thu Mar 24 03:54:46 2016 +0300
+++ b/Implab/Formats/TextScanner.cs Thu Mar 24 18:52:10 2016 +0300
@@ -53,29 +53,24 @@
tag = null;
var maxSymbol = alphabet.Length - 1;
-
+ int next;
do {
// after the next chunk is read the offset in the buffer may change
int pos = m_bufferOffset + m_tokenLength;
-
+ next = state;
while (pos < m_bufferSize) {
var ch = m_buffer[pos];
- try {
- var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
+ next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
if (next == AutomatonConst.UNREACHABLE_STATE)
break;
-
+
state = next;
- }catch {
- throw;
- }
pos++;
}
-
m_tokenLength = pos - m_bufferOffset;
- } while (state != AutomatonConst.UNREACHABLE_STATE && Feed());
+ } while (next != AutomatonConst.UNREACHABLE_STATE && Feed());
m_tokenOffset = m_bufferOffset;
m_bufferOffset += m_tokenLength;
@@ -150,7 +145,7 @@
}
public void CopyTokenTo(char[] buffer, int offset) {
- m_buffer.CopyTo(buffer, offset);
+ Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength);
}
public void CopyTokenTo(StringBuilder sb) {