Mercurial > pub > ImplabNet
changeset 182:76e8f2ba12b8 ref20160224
pretty print DFA, the minimization is still buggy
author | cin |
---|---|
date | Thu, 24 Mar 2016 18:52:10 +0300 (2016-03-24) |
parents | b2b6a6640aa3 |
children | 4f82e0f161c3 |
files | Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Implab.Test/Implab.Format.Test/JsonTests.cs Implab.Test/Implab.Format.Test/packages.config Implab/Automaton/DFATable.cs Implab/Automaton/IDFATableBuilder.cs Implab/Automaton/RegularExpressions/RegularDFA.cs Implab/Components/LazyAndWeak.cs Implab/Formats/JSON/JSONGrammar.cs Implab/Formats/StringScanner.cs Implab/Formats/TextScanner.cs |
diffstat | 10 files changed, 132 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab.Test/Implab.Format.Test/Implab.Format.Test.csproj Thu Mar 24 18:52:10 2016 +0300 @@ -10,6 +10,7 @@ <RootNamespace>Implab.Format.Test</RootNamespace> <AssemblyName>Implab.Format.Test</AssemblyName> <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> + <ReleaseVersion>0.2</ReleaseVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <DebugSymbols>true</DebugSymbols> @@ -32,7 +33,7 @@ <ItemGroup> <Reference Include="System" /> <Reference Include="nunit.framework"> - <HintPath>..\..\packages\NUnit.3.0.1\lib\net45\nunit.framework.dll</HintPath> + <HintPath>..\..\packages\NUnit.2.6.4\lib\nunit.framework.dll</HintPath> </Reference> </ItemGroup> <ItemGroup> @@ -40,6 +41,12 @@ </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <ItemGroup> + <ProjectReference Include="..\..\Implab\Implab.csproj"> + <Project>{F550F1F8-8746-4AD0-9614-855F4C4B7F05}</Project> + <Name>Implab</Name> + </ProjectReference> + </ItemGroup> + <ItemGroup> <None Include="packages.config" /> </ItemGroup> </Project> \ No newline at end of file
--- a/Implab.Test/Implab.Format.Test/JsonTests.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab.Test/Implab.Format.Test/JsonTests.cs Thu Mar 24 18:52:10 2016 +0300 @@ -1,11 +1,49 @@ using NUnit.Framework; using System; +using Implab.Formats.JSON; namespace Implab.Format.Test { - [TestFixture()] + [TestFixture] public class JsonTests { - [Test()] - public void TestCase() { + [Test] + public void TestScannerValidTokens() { + var scanner = new JSONScanner(@"9123, -123, 0, 0.1, -0.2, -0.1e3, 1.3E-3, ""some \t\n\u0020 text"", literal []{}:"); + + Tuple<JsonTokenType,object>[] expexted = new [] { + new Tuple<JsonTokenType,object>(JsonTokenType.Number, 9123d), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Number, -123d ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0d ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Number, 0.1d ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.2d ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Number, -0.1e3d ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Number, 1.3E-3d ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.String, "some \t\n text" ), + new Tuple<JsonTokenType,object>(JsonTokenType.ValueSeparator, ", " ), + new Tuple<JsonTokenType,object>(JsonTokenType.Literal, "literal" ), + new Tuple<JsonTokenType,object>(JsonTokenType.BeginArray, " [" ), + new Tuple<JsonTokenType,object>(JsonTokenType.EndArray, "]" ), + new Tuple<JsonTokenType,object>(JsonTokenType.BeginObject, "{" ), + new Tuple<JsonTokenType,object>(JsonTokenType.EndObject, "}" ), + new Tuple<JsonTokenType,object>(JsonTokenType.NameSeparator, ":" ) + }; + + object value; + JsonTokenType tokenType; + for (var i = 0; i < expexted.Length; i++) { + + Assert.IsTrue(scanner.ReadToken(out value, out tokenType)); + Assert.AreEqual(expexted[i].Item1, tokenType); + Assert.AreEqual(expexted[i].Item2, value); + } + + Assert.IsFalse(scanner.ReadToken(out value, out tokenType)); } } }
--- a/Implab.Test/Implab.Format.Test/packages.config Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab.Test/Implab.Format.Test/packages.config Thu Mar 24 18:52:10 2016 +0300 @@ -1,4 +1,4 @@ <?xml version="1.0" encoding="utf-8"?> <packages> - <package id="NUnit" version="3.0.1" targetFramework="net45" /> + <package id="NUnit" version="2.6.4" targetFramework="net45" /> </packages> \ No newline at end of file
--- a/Implab/Automaton/DFATable.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Automaton/DFATable.cs Thu Mar 24 18:52:10 2016 +0300 @@ -3,6 +3,9 @@ using System.Collections.Generic; using System.Linq; using System.Diagnostics; +using System.IO; +using System.CodeDom.Compiler; +using System.CodeDom; namespace Implab.Automaton { public class DFATable : IDFATableBuilder { @@ -103,6 +106,11 @@ return GetEnumerator(); } + public void AddSymbol(int symbol) { + Safe.ArgumentAssert(symbol >= 0, "symbol"); + m_symbolCount = Math.Max(symbol + 1, m_symbolCount); + } + public int[,] CreateTransitionTable() { var table = new int[StateCount,AlphabetSize]; @@ -162,7 +170,7 @@ var state = new HashSet<int>( Enumerable - .Range(0, m_stateCount - 1) + .Range(0, m_stateCount) .Where(i => !m_finalStates.Contains(i)) ); @@ -182,10 +190,13 @@ for (int c = 0; c < m_symbolCount; c++) { var stateX = new HashSet<int>(); - foreach(var a in stateA.Where(rmap.ContainsKey)) - stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' + //foreach(var a in stateA.Where(rmap.ContainsKey)) + // stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a' - foreach (var stateY in optimalStates.ToArray()) { + stateX.UnionWith(m_transitions.Where(t => stateA.Contains(t.s2) && t.edge == c).Select(t => t.s1)); + + var tmp = optimalStates.ToArray(); + foreach (var stateY in tmp) { if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { var stateR1 = new HashSet<int>(stateY); var stateR2 = new HashSet<int>(stateY); @@ -245,12 +256,8 @@ foreach (var term in A) { // ищем все переходы класса по символу term - var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray(); + var s2 = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).DefaultIfEmpty(-1).First(); - Debug.Assert(res.Length <= 1); - - var s2 = res.Length > 0 ? res[0] : -1; - HashSet<int> a2; if (!classes.TryGetValue(s2, out a2)) { a2 = new HashSet<int>(); @@ -283,6 +290,7 @@ // сохраняем DFAConst.UNCLASSIFIED_INPUT var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++; + optimalDFA.AddSymbol(cls); foreach (var a in item) alphabetMap[a] = cls; @@ -298,19 +306,38 @@ optimalDFA.Add(t); } - protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { + protected string PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); - foreach(var t in m_transitions) - Console.WriteLine( - "[{0}] -{{{1}}}-> [{2}]{3}", - String.Join(",", stateAlphabet.GetSymbols(t.s1)), - String.Join("", inputAlphabet.GetSymbols(t.edge)), - String.Join(",", stateAlphabet.GetSymbols(t.s2)), - m_finalStates.Contains(t.s2) ? "$" : "" - ); + var data = new List<string>(); + + data.Add("digraph dfa {"); + + foreach (var final in m_finalStates) + data.Add(String.Format("{0} [shape=box];",String.Join("", stateAlphabet.GetSymbols(final)))); + + foreach (var t in m_transitions) + data.Add(String.Format( + "{0} -> {2} [label={1}];", + String.Join("", stateAlphabet.GetSymbols(t.s1)), + ToLiteral(ToLiteral(String.Join("", t.edge == AutomatonConst.UNCLASSIFIED_INPUT ? new [] { "@" } : inputAlphabet.GetSymbols(t.edge).Select(x => x.ToString())))), + String.Join("", stateAlphabet.GetSymbols(t.s2)) + )); + data.Add("}"); + return String.Join("\n", data); } + static string ToLiteral(string input) + { + using (var writer = new StringWriter()) + { + using (var provider = CodeDomProvider.CreateProvider("CSharp")) + { + provider.GenerateCodeFromExpression(new CodePrimitiveExpression(input), writer, null); + return writer.ToString(); + } + } + } } }
--- a/Implab/Automaton/IDFATableBuilder.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Automaton/IDFATableBuilder.cs Thu Mar 24 18:52:10 2016 +0300 @@ -10,6 +10,17 @@ void MarkFinalState(int state); void SetInitialState(int s); + + /// <summary> + /// Increases if needed the input alphabet size to hold the specified symbol. + /// </summary> + /// <remarks> + /// <code> + /// AlphabetSize = Math.Max(AlphabetSize, symbol + 1) + /// </code> + /// </remarks> + /// <param name="symbol">Symbol.</param> + void AddSymbol(int symbol); } }
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Thu Mar 24 18:52:10 2016 +0300 @@ -66,6 +66,9 @@ // skip all unclassified symbols foreach (var pair in alphaMap.Where(x => x.Value != 0)) alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); + + var orig = ToString(); + var opt = dfa.ToString(); return dfa; } @@ -78,6 +81,15 @@ return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); } + public override string ToString() { + var states = new MapAlphabet<string>(false, null); + + for (int i = 0; i < StateCount; i++) + states.DefineSymbol(string.Format("s{0}", i), i); + + return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize); + } + } }
--- a/Implab/Components/LazyAndWeak.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Components/LazyAndWeak.cs Thu Mar 24 18:52:10 2016 +0300 @@ -7,7 +7,7 @@ /// </summary> /// <remarks> /// Usefull when dealing with memory-intensive objects which are frequently used. - /// This class is similar to <see cref="ObjectPool{T}"/> except is a singleton. + /// This class is similar to <see cref="ObjectPool{T}"/> except it is a singleton. /// </remarks> public class LazyAndWeak<T> where T : class { @@ -44,6 +44,7 @@ } else { lock (m_lock) { // double check + weak = m_reference; if (weak != null) { value = weak.Target as T; if (value != null)
--- a/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 24 18:52:10 2016 +0300 @@ -108,7 +108,7 @@ } Token SymbolRangeToken(char start, char stop) { - return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x)); + return SymbolToken(Enumerable.Range(start, stop - start + 1).Select(x => (char)x)); } protected override IndexedAlphabetBase<char> CreateAlphabet() {
--- a/Implab/Formats/StringScanner.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Formats/StringScanner.cs Thu Mar 24 18:52:10 2016 +0300 @@ -4,22 +4,14 @@ public class StringScanner: TextScanner { const int CHUNK_SIZE = 1024; - readonly string m_text; - int m_pos; - - public StringScanner(string text) : base(text.Length, text.Length < CHUNK_SIZE ? text.Length : CHUNK_SIZE) { - m_text = text; - Feed(); + public StringScanner(string text) : base(null) { + Safe.ArgumentNotNull(text, "text"); + var data = text.ToCharArray(); + Feed(data, 0, data.Length); } protected override int Read(char[] buffer, int offset, int size) { - var actual = size + m_pos > m_text.Length ? m_text.Length - m_pos : size; - - m_text.CopyTo(m_pos,buffer,offset, actual); - - m_pos += actual; - - return actual; + return 0; } } }
--- a/Implab/Formats/TextScanner.cs Thu Mar 24 03:54:46 2016 +0300 +++ b/Implab/Formats/TextScanner.cs Thu Mar 24 18:52:10 2016 +0300 @@ -53,29 +53,24 @@ tag = null; var maxSymbol = alphabet.Length - 1; - + int next; do { // after the next chunk is read the offset in the buffer may change int pos = m_bufferOffset + m_tokenLength; - + next = state; while (pos < m_bufferSize) { var ch = m_buffer[pos]; - try { - var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; + next = dfa[next, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; if (next == AutomatonConst.UNREACHABLE_STATE) break; - + state = next; - }catch { - throw; - } pos++; } - m_tokenLength = pos - m_bufferOffset; - } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); + } while (next != AutomatonConst.UNREACHABLE_STATE && Feed()); m_tokenOffset = m_bufferOffset; m_bufferOffset += m_tokenLength; @@ -150,7 +145,7 @@ } public void CopyTokenTo(char[] buffer, int offset) { - m_buffer.CopyTo(buffer, offset); + Array.Copy(m_buffer, m_tokenOffset,buffer, offset, m_tokenLength); } public void CopyTokenTo(StringBuilder sb) {