changeset 181:b2b6a6640aa3 ref20160224

minor fixes and debug
author cin
date Thu, 24 Mar 2016 03:54:46 +0300
parents c32688129f14
children 76e8f2ba12b8
files Implab/Automaton/DFATable.cs Implab/Automaton/MapAlphabet.cs Implab/Automaton/RegularExpressions/RegularDFA.cs Implab/Formats/JSON/JSONGrammar.cs Implab/Formats/TextScanner.cs
diffstat 5 files changed, 23 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/Implab/Automaton/DFATable.cs	Thu Mar 24 02:30:46 2016 +0300
+++ b/Implab/Automaton/DFATable.cs	Thu Mar 24 03:54:46 2016 +0300
@@ -2,6 +2,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Diagnostics;
 
 namespace Implab.Automaton {
     public class DFATable : IDFATableBuilder {
@@ -43,10 +44,12 @@
 
         public void SetInitialState(int s) {
             Safe.ArgumentAssert(s >= 0, "s");
+            m_stateCount = Math.Max(m_stateCount, s + 1);
             m_initialState = s;
         }
 
         public void MarkFinalState(int state) {
+            m_stateCount = Math.Max(m_stateCount, state + 1);
             m_finalStates.Add(state);
         }
 
@@ -56,7 +59,7 @@
             Safe.ArgumentAssert(item.edge >= 0, "item");
 
             m_stateCount = Math.Max(m_stateCount, Math.Max(item.s1, item.s2) + 1);
-            m_symbolCount = Math.Max(m_symbolCount, item.edge);
+            m_symbolCount = Math.Max(m_symbolCount, item.edge + 1);
 
             m_transitions.Add(item);
         }
@@ -104,7 +107,7 @@
             var table = new int[StateCount,AlphabetSize];
 
             for (int i = 0; i < StateCount; i++)
-                for (int j = 0; i < AlphabetSize; j++)
+                for (int j = 0; j < AlphabetSize; j++)
                     table[i, j] = AutomatonConst.UNREACHABLE_STATE;
 
             foreach (var t in this)
@@ -170,7 +173,7 @@
                 .GroupBy(t => t.s2)
                 .ToDictionary(
                     g => g.Key, // s2
-                    g => g.GroupBy(t => t.edge, t => t.s1).ToDictionary(p => p.Key)
+                    g => g.ToLookup(t => t.edge, t => t.s1)//.ToDictionary(p => p.Key)
                 );
 
             while (queue.Count > 0) {
@@ -179,7 +182,7 @@
 
                 for (int c = 0; c < m_symbolCount; c++) {
                     var stateX = new HashSet<int>();
-                    foreach(var a in stateA)
+                    foreach(var a in stateA.Where(rmap.ContainsKey))
                         stateX.UnionWith(rmap[a][c]); // all states from wich the symbol 'c' leads to the state 'a'
 
                     foreach (var stateY in optimalStates.ToArray()) {
@@ -244,6 +247,8 @@
                         // ищем все переходы класса по символу term
                         var res = m_transitions.Where(t => stateMap[t.s1] == s && t.edge == term).Select(t => stateMap[t.s2]).ToArray();
 
+                        Debug.Assert(res.Length <= 1);
+
                         var s2 = res.Length > 0 ? res[0] : -1;
                             
                         HashSet<int> a2;
@@ -277,12 +282,10 @@
                     nextCls++;
 
                 // сохраняем DFAConst.UNCLASSIFIED_INPUT
-                var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls;
+                var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls++;
 
                 foreach (var a in item)
                     alphabetMap[a] = cls;
-
-                nextCls++;
             }
 
             // построение автомата
--- a/Implab/Automaton/MapAlphabet.cs	Thu Mar 24 02:30:46 2016 +0300
+++ b/Implab/Automaton/MapAlphabet.cs	Thu Mar 24 03:54:46 2016 +0300
@@ -69,7 +69,7 @@
 
 
         public IEnumerable<T> GetSymbols(int cls) {
-            Safe.ArgumentAssert(cls > 0, "cls");
+            Safe.ArgumentAssert(!m_supportUnclassified || cls > 0, "cls");
             return m_map.Where(p => p.Value == cls).Select(p => p.Key);
         }
         #endregion
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs	Thu Mar 24 02:30:46 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs	Thu Mar 24 03:54:46 2016 +0300
@@ -63,7 +63,8 @@
                 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
 
             // make the alphabet for the new DFA
-            foreach (var pair in alphaMap)
+            // skip all unclassified symbols
+            foreach (var pair in alphaMap.Where(x => x.Value != 0))
                 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
             
             return dfa;
--- a/Implab/Formats/JSON/JSONGrammar.cs	Thu Mar 24 02:30:46 2016 +0300
+++ b/Implab/Formats/JSON/JSONGrammar.cs	Thu Mar 24 03:54:46 2016 +0300
@@ -108,7 +108,7 @@
         }
 
         Token SymbolRangeToken(char start, char stop) {
-            return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>());
+            return SymbolToken(Enumerable.Range(start,stop - start).Select(x => (char)x));
         }
 
         protected override IndexedAlphabetBase<char> CreateAlphabet() {
--- a/Implab/Formats/TextScanner.cs	Thu Mar 24 02:30:46 2016 +0300
+++ b/Implab/Formats/TextScanner.cs	Thu Mar 24 03:54:46 2016 +0300
@@ -61,10 +61,16 @@
                 while (pos < m_bufferSize) {
                     var ch = m_buffer[pos];
 
-                    state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
-                    if (state == AutomatonConst.UNREACHABLE_STATE)
+                    try {
+                    var next = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]];
+                    
+                    if (next == AutomatonConst.UNREACHABLE_STATE)
                         break;
-                    
+
+                    state = next;
+                    }catch {
+                        throw;
+                    }
                     pos++;
                 }