# HG changeset patch
# User cin
# Date 1457907578 -10800
# Node ID 92d5278d1b107bd6648fbe66f7d4e202196bb92e
# Parent 0f70905b46522ae15bf2de1c4b30123912eeab9e
Working on text scanner
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/DFAStateDescriptor.cs
--- a/Implab/Automaton/DFAStateDescriptor.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/DFAStateDescriptor.cs Mon Mar 14 01:19:38 2016 +0300
@@ -1,18 +1,18 @@
namespace Implab.Automaton {
- public struct DFAStateDescriptior {
+ public struct DFAStateDescriptor {
public readonly bool final;
public readonly int[] transitions;
- public DFAStateDescriptior(int[] transitions, bool final) {
+ public DFAStateDescriptor(int[] transitions, bool final) {
this.transitions = transitions;
this.final = final;
}
- public DFAStateDescriptior(int[] transitions) : this(transitions, false) {
+ public DFAStateDescriptor(int[] transitions) : this(transitions, false) {
}
- public DFAStateDescriptior(int size, bool final) {
+ public DFAStateDescriptor(int size, bool final) {
Safe.ArgumentInRange(size, 0, int.MaxValue, "size");
this.final = final;
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/DFATable.cs
--- a/Implab/Automaton/DFATable.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/DFATable.cs Mon Mar 14 01:19:38 2016 +0300
@@ -100,6 +100,20 @@
return GetEnumerator();
}
+ public DFAStateDescriptor[] CreateTransitionTable() {
+ var table = new DFAStateDescriptor[StateCount];
+
+ foreach (var t in this) {
+ if (table[t.s1].transitions == null)
+ table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1));
+ if (table[t.s2].transitions == null)
+ table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2));
+ table[t.s1].transitions[t.edge] = t.s2;
+ }
+
+ return table;
+ }
+
/// Формирует множества конечных состояний перед началом работы алгоритма минимизации.
///
/// В процессе построения минимального автомата требуется разделить множество состояний,
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/IndexedAlphabetBase.cs
--- a/Implab/Automaton/IndexedAlphabetBase.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/IndexedAlphabetBase.cs Mon Mar 14 01:19:38 2016 +0300
@@ -71,8 +71,16 @@
return true;
}
+ public IEnumerable GetSymbols(int cls) {
+ for (var i = 0; i < m_map.Length; i++)
+ if (m_map[i] == cls)
+ yield return GetSymbolByIndex(i);
+ }
+
public abstract int GetSymbolIndex(T symbol);
+ public abstract T GetSymbolByIndex(int index);
+
public abstract IEnumerable InputSymbols { get; }
///
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/MapAlphabet.cs
--- a/Implab/Automaton/MapAlphabet.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/MapAlphabet.cs Mon Mar 14 01:19:38 2016 +0300
@@ -38,7 +38,6 @@
Safe.ArgumentNotNull(symbols, "symbols");
m_nextCls = Math.Max(cls + 1, m_nextCls);
- symbols = symbols.Distinct();
foreach (var symbol in symbols)
m_map[symbol] = cls;
@@ -68,6 +67,10 @@
return m_supportUnclassified || m_map.ContainsKey(symbol);
}
+
+ public IEnumerable GetSymbols(int cls) {
+ return m_map.Where(p => p.Value == cls).Select(p => p.Key);
+ }
#endregion
}
}
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs
--- a/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Mon Mar 14 01:19:38 2016 +0300
@@ -1,14 +1,14 @@
using System;
namespace Implab.Automaton.RegularExpressions {
- public struct DFAStateDescriptorT {
+ public struct DFAStateDescriptor {
public readonly bool final;
public readonly int[] transitions;
public readonly T[] tags;
- public DFAStateDescriptorT(int size, bool final, T[] tags) {
+ public DFAStateDescriptor(int size, bool final, T[] tags) {
Safe.ArgumentAssert(size >= 0, "size");
this.final = final;
this.tags = tags;
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/Grammar.cs
--- a/Implab/Automaton/RegularExpressions/Grammar.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/Grammar.cs Mon Mar 14 01:19:38 2016 +0300
@@ -66,23 +66,21 @@
return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
}
- protected void BuildDFA(Token lang, IDFATableBuilder dfaTable, IAlphabetBuilder dfaAlphabet) {
- Safe.ArgumentNotNull(lang, "lang");
- Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet");
-
- var dfa = new RegularDFADefinition(AlphabetBuilder);
+ protected abstract IAlphabetBuilder CreateAlphabet();
- var builder = new RegularDFABuilder();
+ protected RegularDFA BuildDFA(Token regexp) {
+
+ var dfa = new RegularDFA(AlphabetBuilder);
- lang.Accept( builder );
+ var visitor = new RegularExpressionVisitor();
+ regexp.Accept( visitor );
- builder.BuildDFA(dfa);
+ visitor.BuildDFA(dfa);
if (dfa.IsFinalState(dfa.InitialState))
throw new ApplicationException("The specified language contains empty token");
- dfa.Optimize(dfaTable, dfaAlphabet);
-
+ return dfa.Optimize(CreateAlphabet());
}
}
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Mon Mar 14 01:19:38 2016 +0300
@@ -0,0 +1,8 @@
+using System;
+
+namespace Implab.Automaton.RegularExpressions {
+ public interface ITaggedDFABuilder : IDFATableBuilder {
+ void SetStateTag(int s, TTag[] tags);
+ }
+}
+
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularDFA.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Mon Mar 14 01:19:38 2016 +0300
@@ -0,0 +1,89 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Implab.Automaton.RegularExpressions {
+ public class RegularDFA : DFATable, ITaggedDFABuilder {
+
+ readonly Dictionary m_tags = new Dictionary();
+ readonly IAlphabet m_alphabet;
+
+ public RegularDFA(IAlphabet alphabet) {
+ Safe.ArgumentNotNull(alphabet, "aplhabet");
+
+ m_alphabet = alphabet;
+ }
+
+
+ public IAlphabet InputAlphabet {
+ get {
+ return m_alphabet;
+ }
+ }
+
+ public void MarkFinalState(int s, TTag[] tags) {
+ MarkFinalState(s);
+ SetStateTag(s, tags);
+ }
+
+ public void SetStateTag(int s, TTag[] tags) {
+ Safe.ArgumentNotNull(tags, "tags");
+ m_tags[s] = tags;
+ }
+
+ public TTag[] GetStateTag(int s) {
+ TTag[] tags;
+ return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
+ }
+
+ public new DFAStateDescriptor[] CreateTransitionTable() {
+ var table = new DFAStateDescriptor[StateCount];
+
+ foreach (var t in this) {
+ if (table[t.s1].transitions == null)
+ table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1));
+ if (table[t.s2].transitions == null)
+ table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2));
+ table[t.s1].transitions[t.edge] = t.s2;
+ }
+
+ return table;
+ }
+
+ ///
+ /// Optimize the specified alphabet.
+ ///
+ /// Пустой алфавит, который будет зполнен в процессе оптимизации.
+ public RegularDFA Optimize(IAlphabetBuilder alphabet) {
+ Safe.ArgumentNotNull(alphabet, "alphabet");
+
+ var dfa = new RegularDFA(alphabet);
+
+ var states = new DummyAlphabet(StateCount);
+ var alphaMap = new Dictionary();
+ var stateMap = new Dictionary();
+
+ Optimize(dfa, alphaMap, stateMap);
+
+ // mark tags in the new DFA
+ foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
+ dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
+
+ // make the alphabet for the new DFA
+ foreach (var pair in alphaMap)
+ alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
+
+ return dfa;
+ }
+
+ protected override IEnumerable> GroupFinalStates() {
+ var arrayComparer = new CustomEqualityComparer(
+ (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
+ x => x.Sum(it => x.GetHashCode())
+ );
+ return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g));
+ }
+
+ }
+}
+
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularDFABuilder.cs
--- a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs Thu Mar 10 01:19:33 2016 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,180 +0,0 @@
-using Implab;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-
-namespace Implab.Automaton.RegularExpressions {
- ///
- /// Используется для построения ДКА по регулярному выражению, сначала обходит
- /// регулярное выражение и вычисляет followpos, затем используется метод
- /// для построения автомата.
- ///
- public class RegularDFABuilder : IVisitor {
- int m_idx;
- Token m_root;
- HashSet m_firstpos;
- HashSet m_lastpos;
-
- readonly Dictionary> m_followpos = new Dictionary>();
- readonly Dictionary m_indexes = new Dictionary();
- readonly Dictionary m_ends = new Dictionary();
-
- public Dictionary> FollowposMap {
- get { return m_followpos; }
- }
-
- public HashSet Followpos(int pos) {
- HashSet set;
- return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet();
- }
-
- bool Nullable(object n) {
- if (n is EmptyToken || n is StarToken)
- return true;
- var altToken = n as AltToken;
- if (altToken != null)
- return Nullable(altToken.Left) || Nullable(altToken.Right);
- var catToken = n as CatToken;
- if (catToken != null)
- return Nullable(catToken.Left) && Nullable(catToken.Right);
- return false;
- }
-
-
- public void Visit(AltToken token) {
- if (m_root == null)
- m_root = token;
- var firtspos = new HashSet();
- var lastpos = new HashSet();
-
- token.Left.Accept(this);
- firtspos.UnionWith(m_firstpos);
- lastpos.UnionWith(m_lastpos);
-
- token.Right.Accept(this);
- firtspos.UnionWith(m_firstpos);
- lastpos.UnionWith(m_lastpos);
-
- m_firstpos = firtspos;
- m_lastpos = lastpos;
- }
-
- public void Visit(StarToken token) {
- if (m_root == null)
- m_root = token;
- token.Token.Accept(this);
-
- foreach (var i in m_lastpos)
- Followpos(i).UnionWith(m_firstpos);
- }
-
- public void Visit(CatToken token) {
- if (m_root == null)
- m_root = token;
-
- var firtspos = new HashSet();
- var lastpos = new HashSet();
- token.Left.Accept(this);
- firtspos.UnionWith(m_firstpos);
- var leftLastpos = m_lastpos;
-
- token.Right.Accept(this);
- lastpos.UnionWith(m_lastpos);
- var rightFirstpos = m_firstpos;
-
- if (Nullable(token.Left))
- firtspos.UnionWith(rightFirstpos);
-
- if (Nullable(token.Right))
- lastpos.UnionWith(leftLastpos);
-
- m_firstpos = firtspos;
- m_lastpos = lastpos;
-
- foreach (var i in leftLastpos)
- Followpos(i).UnionWith(rightFirstpos);
-
- }
-
- public void Visit(EmptyToken token) {
- if (m_root == null)
- m_root = token;
- }
-
- public void Visit(SymbolToken token) {
- if (m_root == null)
- m_root = token;
- m_idx++;
- m_indexes[m_idx] = token.Value;
- m_firstpos = new HashSet(new[] { m_idx });
- m_lastpos = new HashSet(new[] { m_idx });
- }
-
- public void Visit(EndToken token) {
- if (m_root == null)
- m_root = token;
- m_idx++;
- m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
- m_firstpos = new HashSet(new[] { m_idx });
- m_lastpos = new HashSet(new[] { m_idx });
- Followpos(m_idx);
- m_ends.Add(m_idx, token.Tag);
- }
-
- public void BuildDFA(IDFATableBuilder dfa) {
- Safe.ArgumentNotNull(dfa,"dfa");
-
- var states = new MapAlphabet>(new CustomEqualityComparer>(
- (x, y) => x.SetEquals(y),
- x => x.Sum(n => n.GetHashCode())
- ));
-
- var initialState = states.DefineSymbol(m_firstpos);
- dfa.SetInitialState(initialState);
-
- var tags = GetStateTags(m_firstpos);
- if (tags != null && tags.Length > 0)
- dfa.MarkFinalState(initialState, tags);
-
- var inputMax = m_indexes.Values.Max();
- var queue = new Queue>();
-
- queue.Enqueue(m_firstpos);
-
- while (queue.Count > 0) {
- var state = queue.Dequeue();
- var s1 = states.Translate(state);
- Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
-
- for (int a = 0; a <= inputMax; a++) {
- var next = new HashSet();
- foreach (var p in state) {
- if (m_indexes[p] == a) {
- next.UnionWith(Followpos(p));
- }
- }
- if (next.Count > 0) {
- int s2 = states.Translate(next);
- if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
- s2 = states.DefineSymbol(next);
-
- tags = GetStateTags(next);
- if (tags != null && tags.Length > 0)
- dfa.MarkFinalState(s2, tags);
-
- queue.Enqueue(next);
- }
- dfa.Add(new AutomatonTransition(s1, s2, a));
- }
- }
- }
- }
-
- TTag[] GetStateTags(IEnumerable state) {
- Debug.Assert(state != null);
- return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
- }
-
- }
-}
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularDFADefinition.cs
--- a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Thu Mar 10 01:19:33 2016 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace Implab.Automaton.RegularExpressions {
- public class RegularDFADefinition : DFATable {
-
- readonly Dictionary m_tags = new Dictionary();
- readonly IAlphabet m_alphabet;
-
- public RegularDFADefinition(IAlphabet alphabet) {
- Safe.ArgumentNotNull(alphabet, "aplhabet");
-
- m_alphabet = alphabet;
- }
-
-
- public IAlphabet InputAlphabet {
- get {
- return m_alphabet;
- }
- }
-
- public void MarkFinalState(int s, TTag[] tags) {
- MarkFinalState(s);
- SetStateTag(s, tags);
- }
-
- public void SetStateTag(int s, TTag[] tags) {
- Safe.ArgumentNotNull(tags, "tags");
- m_tags[s] = tags;
- }
-
- public TTag[] GetStateTag(int s) {
- TTag[] tags;
- return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
- }
-
- ///
- /// Optimize the specified alphabet.
- ///
- /// Пустой алфавит, который будет зполнен в процессе оптимизации.
- public RegularDFADefinition Optimize(IAlphabetBuilder alphabet) {
- Safe.ArgumentNotNull(alphabet, "alphabet");
-
- var dfaTable = new RegularDFADefinition(alphabet);
-
- var states = new DummyAlphabet(StateCount);
- var alphaMap = new Dictionary();
- var stateMap = new Dictionary();
- Optimize(dfaTable, alphaMap, stateMap);
-
- foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
- dfaTable.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
-
- return dfaTable;
- }
-
- protected override IEnumerable> GroupFinalStates() {
- var arrayComparer = new CustomEqualityComparer(
- (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
- x => x.Sum(it => x.GetHashCode())
- );
- return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g));
- }
-
- }
-}
-
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Mon Mar 14 01:19:38 2016 +0300
@@ -0,0 +1,184 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Implab.Automaton.RegularExpressions {
+ ///
+ /// Используется для построения ДКА по регулярному выражению, сначала обходит
+ /// регулярное выражение и вычисляет followpos, затем используется метод
+ /// для построения автомата.
+ ///
+ public class RegularExpressionVisitor : IVisitor {
+ int m_idx;
+ Token m_root;
+ HashSet m_firstpos;
+ HashSet m_lastpos;
+
+ readonly Dictionary> m_followpos = new Dictionary>();
+ readonly Dictionary m_indexes = new Dictionary();
+ readonly Dictionary m_ends = new Dictionary();
+
+ public Dictionary> FollowposMap {
+ get { return m_followpos; }
+ }
+
+ public HashSet Followpos(int pos) {
+ HashSet set;
+ return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet();
+ }
+
+ bool Nullable(object n) {
+ if (n is EmptyToken || n is StarToken)
+ return true;
+ var altToken = n as AltToken;
+ if (altToken != null)
+ return Nullable(altToken.Left) || Nullable(altToken.Right);
+ var catToken = n as CatToken;
+ if (catToken != null)
+ return Nullable(catToken.Left) && Nullable(catToken.Right);
+ return false;
+ }
+
+
+ public void Visit(AltToken token) {
+ if (m_root == null)
+ m_root = token;
+ var firtspos = new HashSet();
+ var lastpos = new HashSet();
+
+ token.Left.Accept(this);
+ firtspos.UnionWith(m_firstpos);
+ lastpos.UnionWith(m_lastpos);
+
+ token.Right.Accept(this);
+ firtspos.UnionWith(m_firstpos);
+ lastpos.UnionWith(m_lastpos);
+
+ m_firstpos = firtspos;
+ m_lastpos = lastpos;
+ }
+
+ public void Visit(StarToken token) {
+ if (m_root == null)
+ m_root = token;
+ token.Token.Accept(this);
+
+ foreach (var i in m_lastpos)
+ Followpos(i).UnionWith(m_firstpos);
+ }
+
+ public void Visit(CatToken token) {
+ if (m_root == null)
+ m_root = token;
+
+ var firtspos = new HashSet();
+ var lastpos = new HashSet();
+ token.Left.Accept(this);
+ firtspos.UnionWith(m_firstpos);
+ var leftLastpos = m_lastpos;
+
+ token.Right.Accept(this);
+ lastpos.UnionWith(m_lastpos);
+ var rightFirstpos = m_firstpos;
+
+ if (Nullable(token.Left))
+ firtspos.UnionWith(rightFirstpos);
+
+ if (Nullable(token.Right))
+ lastpos.UnionWith(leftLastpos);
+
+ m_firstpos = firtspos;
+ m_lastpos = lastpos;
+
+ foreach (var i in leftLastpos)
+ Followpos(i).UnionWith(rightFirstpos);
+
+ }
+
+ public void Visit(EmptyToken token) {
+ if (m_root == null)
+ m_root = token;
+ }
+
+ public void Visit(SymbolToken token) {
+ if (m_root == null)
+ m_root = token;
+ m_idx++;
+ m_indexes[m_idx] = token.Value;
+ m_firstpos = new HashSet(new[] { m_idx });
+ m_lastpos = new HashSet(new[] { m_idx });
+ }
+
+ public void Visit(EndToken token) {
+ if (m_root == null)
+ m_root = token;
+ m_idx++;
+ m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
+ m_firstpos = new HashSet(new[] { m_idx });
+ m_lastpos = new HashSet(new[] { m_idx });
+ Followpos(m_idx);
+ m_ends.Add(m_idx, token.Tag);
+ }
+
+ public void BuildDFA(ITaggedDFABuilder dfa) {
+ Safe.ArgumentNotNull(dfa,"dfa");
+
+ var states = new MapAlphabet>(
+ false,
+ new CustomEqualityComparer>(
+ (x, y) => x.SetEquals(y),
+ x => x.Sum(n => n.GetHashCode())
+ ));
+
+ var initialState = states.DefineSymbol(m_firstpos);
+ dfa.SetInitialState(initialState);
+
+ var tags = GetStateTags(m_firstpos);
+ if (tags != null && tags.Length > 0)
+ dfa.MarkFinalState(initialState, tags);
+
+ var inputMax = m_indexes.Values.Max();
+ var queue = new Queue>();
+
+ queue.Enqueue(m_firstpos);
+
+ while (queue.Count > 0) {
+ var state = queue.Dequeue();
+ var s1 = states.Translate(state);
+ Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
+
+ for (int a = 0; a <= inputMax; a++) {
+ var next = new HashSet();
+ foreach (var p in state) {
+ if (m_indexes[p] == a) {
+ next.UnionWith(Followpos(p));
+ }
+ }
+ if (next.Count > 0) {
+ int s2 = states.Translate(next);
+ if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
+ s2 = states.DefineSymbol(next);
+
+ tags = GetStateTags(next);
+ if (tags != null && tags.Length > 0) {
+ dfa.MarkFinalState(s2);
+ dfa.SetStateTag(s2, tags);
+ }
+
+ queue.Enqueue(next);
+ }
+ dfa.Add(new AutomatonTransition(s1, s2, a));
+ }
+ }
+ }
+ }
+
+ TTag[] GetStateTags(IEnumerable state) {
+ Debug.Assert(state != null);
+ return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
+ }
+
+ }
+}
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/Scanner.cs
--- a/Implab/Automaton/Scanner.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Automaton/Scanner.cs Mon Mar 14 01:19:38 2016 +0300
@@ -3,6 +3,7 @@
using System.Collections.Generic;
using System.IO;
using Implab.Components;
+using Implab.Automaton.RegularExpressions;
namespace Implab.Automaton {
///
@@ -14,19 +15,23 @@
/// конца токена и допустимости текущего символа.
///
public abstract class Scanner : Disposable {
- struct ScannerConfig {
- public DFAStateDescriptior[] states;
- public int[] alphabetMap;
- public int initialState;
+ protected struct ScannerConfig {
+ public readonly DFAStateDescriptor[] states;
+ public readonly int[] alphabet;
+ public readonly int initialState;
+
+ public ScannerConfig(DFAStateDescriptor[] states, int[] alphabet, int initialState) {
+ this.initialState = initialState;
+ this.alphabet = alphabet;
+ this.states = states;
+ }
}
Stack m_defs = new Stack();
- DFAStateDescriptior[] m_states;
- int[] m_alphabetMap;
- int m_initialState;
+ ScannerConfig m_config;
- protected DFAStateDescriptior m_currentState;
+ protected DFAStateDescriptor m_currentState;
int m_previewCode;
protected int m_tokenLen;
@@ -41,15 +46,11 @@
int m_chunkSize = 1024; // 1k
int m_limit = 10 * 1024 * 1024; // 10Mb
- protected Scanner(DFAStateDescriptior[] states, int[] alphabet, int initialState) {
- Safe.ArgumentNotEmpty(states, "states");
- Safe.ArgumentNotNull(alphabet, "alphabet");
+ protected Scanner(ScannerConfig config) {
+ Safe.ArgumentNotEmpty(config.states, "config.states");
+ Safe.ArgumentNotNull(config.alphabet, "config.alphabet");
- m_states = states;
- m_alphabetMap = alphabet;
- m_initialState = initialState;
-
- Feed(new char[0]);
+ m_config = config;
}
///
@@ -110,7 +111,7 @@
///
protected TTag[] TokenTags {
get {
- return m_currentState.tag;
+ return m_currentState.tags;
}
}
@@ -133,7 +134,7 @@
if (m_pointer >= m_bufferSize)
return false;
- m_currentState = m_states[m_initialState];
+ m_currentState = m_config.states[m_config.initialState];
m_tokenLen = 0;
m_tokenOffset = m_pointer;
int nextState;
@@ -151,7 +152,7 @@
)
);
}
- m_currentState = m_states[nextState];
+ m_currentState = m_config.states[nextState];
m_tokenLen++;
} while (Shift());
@@ -172,7 +173,7 @@
return false;
}
- m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+ m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
return true;
}
@@ -217,23 +218,14 @@
/// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей
/// группировки.
///
- /// Таблица состояний нового ДКА
- /// Таблица входных символов для нового ДКА
- ///
- protected void Switch(DFAStateDescriptior[] states, int[] alphabet, int initialState) {
- Safe.ArgumentNotNull(states, "dfa");
+ ///
+ protected void Switch(ScannerConfig config) {
+ Safe.ArgumentNotNull(config.states, "config.states");
- m_defs.Push(new ScannerConfig {
- states = m_states,
- alphabetMap = m_alphabetMap,
- initialState = m_initialState
- });
+ m_defs.Push(m_config);
+ m_config = config;
- m_states = states;
- m_alphabetMap = alphabet;
- m_initialState = initialState;
-
- m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+ m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
}
///
@@ -242,11 +234,9 @@
protected void Restore() {
if (m_defs.Count == 0)
throw new InvalidOperationException();
- var prev = m_defs.Pop();
- m_states = prev.states;
- m_alphabetMap = prev.alphabetMap;
- m_initialState = prev.initialState;
- m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+ m_config = m_defs.Pop();
+
+ m_previewCode = m_config.alphabet[m_buffer[m_pointer]];
}
protected override void Dispose(bool disposing) {
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/ByteAlphabet.cs
--- a/Implab/Formats/ByteAlphabet.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Formats/ByteAlphabet.cs Mon Mar 14 01:19:38 2016 +0300
@@ -13,6 +13,10 @@
return (int)symbol;
}
+ public override byte GetSymbolByIndex(int index) {
+ return (byte)index;
+ }
+
public IEnumerable InputSymbols {
get {
return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast();
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/CharAlphabet.cs
--- a/Implab/Formats/CharAlphabet.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Formats/CharAlphabet.cs Mon Mar 14 01:19:38 2016 +0300
@@ -13,6 +13,10 @@
return symbol;
}
+ public override char GetSymbolByIndex(int index) {
+ return (char)index;
+ }
+
public override IEnumerable InputSymbols {
get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast(); }
}
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/JSON/JSONGrammar.cs
--- a/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Formats/JSON/JSONGrammar.cs Mon Mar 14 01:19:38 2016 +0300
@@ -1,6 +1,7 @@
using System.Linq;
using Implab.Automaton.RegularExpressions;
using System;
+using Implab.Automaton;
namespace Implab.Formats.JSON {
class JSONGrammar : Grammar {
@@ -35,8 +36,8 @@
get { return _instance.Value; }
}
- readonly RegularCharDFADefinition m_jsonDFA;
- readonly RegularCharDFADefinition m_stringDFA;
+ readonly RegularDFA m_jsonDFA;
+ readonly RegularDFA m_stringDFA;
public JSONGrammar() {
DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
@@ -87,21 +88,17 @@
.Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
- m_jsonDFA = new RegularCharDFADefinition(new CharAlphabet());
- BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
-
-
- m_stringDFA = new RegularCharDFADefinition(new CharAlphabet());
- BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet);
+ m_jsonDFA = BuildDFA(jsonExpression);
+ m_stringDFA = BuildDFA(jsonStringExpression);
}
- public RegularCharDFADefinition JsonDFA {
+ public RegularDFA JsonDFA {
get {
return m_jsonDFA;
}
}
- public RegularDFADefinition JsonStringDFA {
+ public RegularDFA JsonStringDFA {
get {
return m_stringDFA;
}
@@ -110,6 +107,10 @@
Token SymbolRangeToken(char start, char stop) {
return SymbolToken(Enumerable.Range(start,stop - start).Cast());
}
+
+ protected override IAlphabetBuilder CreateAlphabet() {
+ return new CharAlphabet();
+ }
}
}
diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/JSON/JSONParser.cs
--- a/Implab/Formats/JSON/JSONParser.cs Thu Mar 10 01:19:33 2016 +0300
+++ b/Implab/Formats/JSON/JSONParser.cs Mon Mar 14 01:19:38 2016 +0300
@@ -86,9 +86,9 @@
return Token