# HG changeset patch # User cin # Date 1457907578 -10800 # Node ID 92d5278d1b107bd6648fbe66f7d4e202196bb92e # Parent 0f70905b46522ae15bf2de1c4b30123912eeab9e Working on text scanner diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/DFAStateDescriptor.cs --- a/Implab/Automaton/DFAStateDescriptor.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/DFAStateDescriptor.cs Mon Mar 14 01:19:38 2016 +0300 @@ -1,18 +1,18 @@ namespace Implab.Automaton { - public struct DFAStateDescriptior { + public struct DFAStateDescriptor { public readonly bool final; public readonly int[] transitions; - public DFAStateDescriptior(int[] transitions, bool final) { + public DFAStateDescriptor(int[] transitions, bool final) { this.transitions = transitions; this.final = final; } - public DFAStateDescriptior(int[] transitions) : this(transitions, false) { + public DFAStateDescriptor(int[] transitions) : this(transitions, false) { } - public DFAStateDescriptior(int size, bool final) { + public DFAStateDescriptor(int size, bool final) { Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); this.final = final; diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/DFATable.cs --- a/Implab/Automaton/DFATable.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/DFATable.cs Mon Mar 14 01:19:38 2016 +0300 @@ -100,6 +100,20 @@ return GetEnumerator(); } + public DFAStateDescriptor[] CreateTransitionTable() { + var table = new DFAStateDescriptor[StateCount]; + + foreach (var t in this) { + if (table[t.s1].transitions == null) + table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1)); + if (table[t.s2].transitions == null) + table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2)); + table[t.s1].transitions[t.edge] = t.s2; + } + + return table; + } + /// Формирует множества конечных состояний перед началом работы алгоритма минимизации. /// /// В процессе построения минимального автомата требуется разделить множество состояний, diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/IndexedAlphabetBase.cs --- a/Implab/Automaton/IndexedAlphabetBase.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/IndexedAlphabetBase.cs Mon Mar 14 01:19:38 2016 +0300 @@ -71,8 +71,16 @@ return true; } + public IEnumerable GetSymbols(int cls) { + for (var i = 0; i < m_map.Length; i++) + if (m_map[i] == cls) + yield return GetSymbolByIndex(i); + } + public abstract int GetSymbolIndex(T symbol); + public abstract T GetSymbolByIndex(int index); + public abstract IEnumerable InputSymbols { get; } /// diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/MapAlphabet.cs --- a/Implab/Automaton/MapAlphabet.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/MapAlphabet.cs Mon Mar 14 01:19:38 2016 +0300 @@ -38,7 +38,6 @@ Safe.ArgumentNotNull(symbols, "symbols"); m_nextCls = Math.Max(cls + 1, m_nextCls); - symbols = symbols.Distinct(); foreach (var symbol in symbols) m_map[symbol] = cls; @@ -68,6 +67,10 @@ return m_supportUnclassified || m_map.ContainsKey(symbol); } + + public IEnumerable GetSymbols(int cls) { + return m_map.Where(p => p.Value == cls).Select(p => p.Key); + } #endregion } } diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs --- a/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Mon Mar 14 01:19:38 2016 +0300 @@ -1,14 +1,14 @@ using System; namespace Implab.Automaton.RegularExpressions { - public struct DFAStateDescriptorT { + public struct DFAStateDescriptor { public readonly bool final; public readonly int[] transitions; public readonly T[] tags; - public DFAStateDescriptorT(int size, bool final, T[] tags) { + public DFAStateDescriptor(int size, bool final, T[] tags) { Safe.ArgumentAssert(size >= 0, "size"); this.final = final; this.tags = tags; diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/Grammar.cs --- a/Implab/Automaton/RegularExpressions/Grammar.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/Grammar.cs Mon Mar 14 01:19:38 2016 +0300 @@ -66,23 +66,21 @@ return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); } - protected void BuildDFA(Token lang, IDFATableBuilder dfaTable, IAlphabetBuilder dfaAlphabet) { - Safe.ArgumentNotNull(lang, "lang"); - Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet"); - - var dfa = new RegularDFADefinition(AlphabetBuilder); + protected abstract IAlphabetBuilder CreateAlphabet(); - var builder = new RegularDFABuilder(); + protected RegularDFA BuildDFA(Token regexp) { + + var dfa = new RegularDFA(AlphabetBuilder); - lang.Accept( builder ); + var visitor = new RegularExpressionVisitor(); + regexp.Accept( visitor ); - builder.BuildDFA(dfa); + visitor.BuildDFA(dfa); if (dfa.IsFinalState(dfa.InitialState)) throw new ApplicationException("The specified language contains empty token"); - dfa.Optimize(dfaTable, dfaAlphabet); - + return dfa.Optimize(CreateAlphabet()); } } diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Mon Mar 14 01:19:38 2016 +0300 @@ -0,0 +1,8 @@ +using System; + +namespace Implab.Automaton.RegularExpressions { + public interface ITaggedDFABuilder : IDFATableBuilder { + void SetStateTag(int s, TTag[] tags); + } +} + diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularDFA.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Mon Mar 14 01:19:38 2016 +0300 @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + public class RegularDFA : DFATable, ITaggedDFABuilder { + + readonly Dictionary m_tags = new Dictionary(); + readonly IAlphabet m_alphabet; + + public RegularDFA(IAlphabet alphabet) { + Safe.ArgumentNotNull(alphabet, "aplhabet"); + + m_alphabet = alphabet; + } + + + public IAlphabet InputAlphabet { + get { + return m_alphabet; + } + } + + public void MarkFinalState(int s, TTag[] tags) { + MarkFinalState(s); + SetStateTag(s, tags); + } + + public void SetStateTag(int s, TTag[] tags) { + Safe.ArgumentNotNull(tags, "tags"); + m_tags[s] = tags; + } + + public TTag[] GetStateTag(int s) { + TTag[] tags; + return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; + } + + public new DFAStateDescriptor[] CreateTransitionTable() { + var table = new DFAStateDescriptor[StateCount]; + + foreach (var t in this) { + if (table[t.s1].transitions == null) + table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1)); + if (table[t.s2].transitions == null) + table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2)); + table[t.s1].transitions[t.edge] = t.s2; + } + + return table; + } + + /// + /// Optimize the specified alphabet. + /// + /// Пустой алфавит, который будет зполнен в процессе оптимизации. + public RegularDFA Optimize(IAlphabetBuilder alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + var dfa = new RegularDFA(alphabet); + + var states = new DummyAlphabet(StateCount); + var alphaMap = new Dictionary(); + var stateMap = new Dictionary(); + + Optimize(dfa, alphaMap, stateMap); + + // mark tags in the new DFA + foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) + dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); + + // make the alphabet for the new DFA + foreach (var pair in alphaMap) + alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); + + return dfa; + } + + protected override IEnumerable> GroupFinalStates() { + var arrayComparer = new CustomEqualityComparer( + (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), + x => x.Sum(it => x.GetHashCode()) + ); + return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g)); + } + + } +} + diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularDFABuilder.cs --- a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs Thu Mar 10 01:19:33 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,180 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - /// - /// Используется для построения ДКА по регулярному выражению, сначала обходит - /// регулярное выражение и вычисляет followpos, затем используется метод - /// для построения автомата. - /// - public class RegularDFABuilder : IVisitor { - int m_idx; - Token m_root; - HashSet m_firstpos; - HashSet m_lastpos; - - readonly Dictionary> m_followpos = new Dictionary>(); - readonly Dictionary m_indexes = new Dictionary(); - readonly Dictionary m_ends = new Dictionary(); - - public Dictionary> FollowposMap { - get { return m_followpos; } - } - - public HashSet Followpos(int pos) { - HashSet set; - return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet(); - } - - bool Nullable(object n) { - if (n is EmptyToken || n is StarToken) - return true; - var altToken = n as AltToken; - if (altToken != null) - return Nullable(altToken.Left) || Nullable(altToken.Right); - var catToken = n as CatToken; - if (catToken != null) - return Nullable(catToken.Left) && Nullable(catToken.Right); - return false; - } - - - public void Visit(AltToken token) { - if (m_root == null) - m_root = token; - var firtspos = new HashSet(); - var lastpos = new HashSet(); - - token.Left.Accept(this); - firtspos.UnionWith(m_firstpos); - lastpos.UnionWith(m_lastpos); - - token.Right.Accept(this); - firtspos.UnionWith(m_firstpos); - lastpos.UnionWith(m_lastpos); - - m_firstpos = firtspos; - m_lastpos = lastpos; - } - - public void Visit(StarToken token) { - if (m_root == null) - m_root = token; - token.Token.Accept(this); - - foreach (var i in m_lastpos) - Followpos(i).UnionWith(m_firstpos); - } - - public void Visit(CatToken token) { - if (m_root == null) - m_root = token; - - var firtspos = new HashSet(); - var lastpos = new HashSet(); - token.Left.Accept(this); - firtspos.UnionWith(m_firstpos); - var leftLastpos = m_lastpos; - - token.Right.Accept(this); - lastpos.UnionWith(m_lastpos); - var rightFirstpos = m_firstpos; - - if (Nullable(token.Left)) - firtspos.UnionWith(rightFirstpos); - - if (Nullable(token.Right)) - lastpos.UnionWith(leftLastpos); - - m_firstpos = firtspos; - m_lastpos = lastpos; - - foreach (var i in leftLastpos) - Followpos(i).UnionWith(rightFirstpos); - - } - - public void Visit(EmptyToken token) { - if (m_root == null) - m_root = token; - } - - public void Visit(SymbolToken token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = token.Value; - m_firstpos = new HashSet(new[] { m_idx }); - m_lastpos = new HashSet(new[] { m_idx }); - } - - public void Visit(EndToken token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; - m_firstpos = new HashSet(new[] { m_idx }); - m_lastpos = new HashSet(new[] { m_idx }); - Followpos(m_idx); - m_ends.Add(m_idx, token.Tag); - } - - public void BuildDFA(IDFATableBuilder dfa) { - Safe.ArgumentNotNull(dfa,"dfa"); - - var states = new MapAlphabet>(new CustomEqualityComparer>( - (x, y) => x.SetEquals(y), - x => x.Sum(n => n.GetHashCode()) - )); - - var initialState = states.DefineSymbol(m_firstpos); - dfa.SetInitialState(initialState); - - var tags = GetStateTags(m_firstpos); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(initialState, tags); - - var inputMax = m_indexes.Values.Max(); - var queue = new Queue>(); - - queue.Enqueue(m_firstpos); - - while (queue.Count > 0) { - var state = queue.Dequeue(); - var s1 = states.Translate(state); - Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); - - for (int a = 0; a <= inputMax; a++) { - var next = new HashSet(); - foreach (var p in state) { - if (m_indexes[p] == a) { - next.UnionWith(Followpos(p)); - } - } - if (next.Count > 0) { - int s2 = states.Translate(next); - if (s2 == DFAConst.UNCLASSIFIED_INPUT) { - s2 = states.DefineSymbol(next); - - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(s2, tags); - - queue.Enqueue(next); - } - dfa.Add(new AutomatonTransition(s1, s2, a)); - } - } - } - } - - TTag[] GetStateTags(IEnumerable state) { - Debug.Assert(state != null); - return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); - } - - } -} diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularDFADefinition.cs --- a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Thu Mar 10 01:19:33 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - public class RegularDFADefinition : DFATable { - - readonly Dictionary m_tags = new Dictionary(); - readonly IAlphabet m_alphabet; - - public RegularDFADefinition(IAlphabet alphabet) { - Safe.ArgumentNotNull(alphabet, "aplhabet"); - - m_alphabet = alphabet; - } - - - public IAlphabet InputAlphabet { - get { - return m_alphabet; - } - } - - public void MarkFinalState(int s, TTag[] tags) { - MarkFinalState(s); - SetStateTag(s, tags); - } - - public void SetStateTag(int s, TTag[] tags) { - Safe.ArgumentNotNull(tags, "tags"); - m_tags[s] = tags; - } - - public TTag[] GetStateTag(int s) { - TTag[] tags; - return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; - } - - /// - /// Optimize the specified alphabet. - /// - /// Пустой алфавит, который будет зполнен в процессе оптимизации. - public RegularDFADefinition Optimize(IAlphabetBuilder alphabet) { - Safe.ArgumentNotNull(alphabet, "alphabet"); - - var dfaTable = new RegularDFADefinition(alphabet); - - var states = new DummyAlphabet(StateCount); - var alphaMap = new Dictionary(); - var stateMap = new Dictionary(); - Optimize(dfaTable, alphaMap, stateMap); - - foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) - dfaTable.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); - - return dfaTable; - } - - protected override IEnumerable> GroupFinalStates() { - var arrayComparer = new CustomEqualityComparer( - (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), - x => x.Sum(it => x.GetHashCode()) - ); - return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet(g)); - } - - } -} - diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Mon Mar 14 01:19:38 2016 +0300 @@ -0,0 +1,184 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + /// + /// Используется для построения ДКА по регулярному выражению, сначала обходит + /// регулярное выражение и вычисляет followpos, затем используется метод + /// для построения автомата. + /// + public class RegularExpressionVisitor : IVisitor { + int m_idx; + Token m_root; + HashSet m_firstpos; + HashSet m_lastpos; + + readonly Dictionary> m_followpos = new Dictionary>(); + readonly Dictionary m_indexes = new Dictionary(); + readonly Dictionary m_ends = new Dictionary(); + + public Dictionary> FollowposMap { + get { return m_followpos; } + } + + public HashSet Followpos(int pos) { + HashSet set; + return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet(); + } + + bool Nullable(object n) { + if (n is EmptyToken || n is StarToken) + return true; + var altToken = n as AltToken; + if (altToken != null) + return Nullable(altToken.Left) || Nullable(altToken.Right); + var catToken = n as CatToken; + if (catToken != null) + return Nullable(catToken.Left) && Nullable(catToken.Right); + return false; + } + + + public void Visit(AltToken token) { + if (m_root == null) + m_root = token; + var firtspos = new HashSet(); + var lastpos = new HashSet(); + + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + token.Right.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + } + + public void Visit(StarToken token) { + if (m_root == null) + m_root = token; + token.Token.Accept(this); + + foreach (var i in m_lastpos) + Followpos(i).UnionWith(m_firstpos); + } + + public void Visit(CatToken token) { + if (m_root == null) + m_root = token; + + var firtspos = new HashSet(); + var lastpos = new HashSet(); + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + var leftLastpos = m_lastpos; + + token.Right.Accept(this); + lastpos.UnionWith(m_lastpos); + var rightFirstpos = m_firstpos; + + if (Nullable(token.Left)) + firtspos.UnionWith(rightFirstpos); + + if (Nullable(token.Right)) + lastpos.UnionWith(leftLastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + + foreach (var i in leftLastpos) + Followpos(i).UnionWith(rightFirstpos); + + } + + public void Visit(EmptyToken token) { + if (m_root == null) + m_root = token; + } + + public void Visit(SymbolToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = token.Value; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + } + + public void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx, token.Tag); + } + + public void BuildDFA(ITaggedDFABuilder dfa) { + Safe.ArgumentNotNull(dfa,"dfa"); + + var states = new MapAlphabet>( + false, + new CustomEqualityComparer>( + (x, y) => x.SetEquals(y), + x => x.Sum(n => n.GetHashCode()) + )); + + var initialState = states.DefineSymbol(m_firstpos); + dfa.SetInitialState(initialState); + + var tags = GetStateTags(m_firstpos); + if (tags != null && tags.Length > 0) + dfa.MarkFinalState(initialState, tags); + + var inputMax = m_indexes.Values.Max(); + var queue = new Queue>(); + + queue.Enqueue(m_firstpos); + + while (queue.Count > 0) { + var state = queue.Dequeue(); + var s1 = states.Translate(state); + Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); + + for (int a = 0; a <= inputMax; a++) { + var next = new HashSet(); + foreach (var p in state) { + if (m_indexes[p] == a) { + next.UnionWith(Followpos(p)); + } + } + if (next.Count > 0) { + int s2 = states.Translate(next); + if (s2 == DFAConst.UNCLASSIFIED_INPUT) { + s2 = states.DefineSymbol(next); + + tags = GetStateTags(next); + if (tags != null && tags.Length > 0) { + dfa.MarkFinalState(s2); + dfa.SetStateTag(s2, tags); + } + + queue.Enqueue(next); + } + dfa.Add(new AutomatonTransition(s1, s2, a)); + } + } + } + } + + TTag[] GetStateTags(IEnumerable state) { + Debug.Assert(state != null); + return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); + } + + } +} diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Automaton/Scanner.cs --- a/Implab/Automaton/Scanner.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/Scanner.cs Mon Mar 14 01:19:38 2016 +0300 @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using Implab.Components; +using Implab.Automaton.RegularExpressions; namespace Implab.Automaton { /// @@ -14,19 +15,23 @@ /// конца токена и допустимости текущего символа. /// public abstract class Scanner : Disposable { - struct ScannerConfig { - public DFAStateDescriptior[] states; - public int[] alphabetMap; - public int initialState; + protected struct ScannerConfig { + public readonly DFAStateDescriptor[] states; + public readonly int[] alphabet; + public readonly int initialState; + + public ScannerConfig(DFAStateDescriptor[] states, int[] alphabet, int initialState) { + this.initialState = initialState; + this.alphabet = alphabet; + this.states = states; + } } Stack m_defs = new Stack(); - DFAStateDescriptior[] m_states; - int[] m_alphabetMap; - int m_initialState; + ScannerConfig m_config; - protected DFAStateDescriptior m_currentState; + protected DFAStateDescriptor m_currentState; int m_previewCode; protected int m_tokenLen; @@ -41,15 +46,11 @@ int m_chunkSize = 1024; // 1k int m_limit = 10 * 1024 * 1024; // 10Mb - protected Scanner(DFAStateDescriptior[] states, int[] alphabet, int initialState) { - Safe.ArgumentNotEmpty(states, "states"); - Safe.ArgumentNotNull(alphabet, "alphabet"); + protected Scanner(ScannerConfig config) { + Safe.ArgumentNotEmpty(config.states, "config.states"); + Safe.ArgumentNotNull(config.alphabet, "config.alphabet"); - m_states = states; - m_alphabetMap = alphabet; - m_initialState = initialState; - - Feed(new char[0]); + m_config = config; } /// @@ -110,7 +111,7 @@ /// protected TTag[] TokenTags { get { - return m_currentState.tag; + return m_currentState.tags; } } @@ -133,7 +134,7 @@ if (m_pointer >= m_bufferSize) return false; - m_currentState = m_states[m_initialState]; + m_currentState = m_config.states[m_config.initialState]; m_tokenLen = 0; m_tokenOffset = m_pointer; int nextState; @@ -151,7 +152,7 @@ ) ); } - m_currentState = m_states[nextState]; + m_currentState = m_config.states[nextState]; m_tokenLen++; } while (Shift()); @@ -172,7 +173,7 @@ return false; } - m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; return true; } @@ -217,23 +218,14 @@ /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей /// группировки. /// - /// Таблица состояний нового ДКА - /// Таблица входных символов для нового ДКА - /// - protected void Switch(DFAStateDescriptior[] states, int[] alphabet, int initialState) { - Safe.ArgumentNotNull(states, "dfa"); + /// + protected void Switch(ScannerConfig config) { + Safe.ArgumentNotNull(config.states, "config.states"); - m_defs.Push(new ScannerConfig { - states = m_states, - alphabetMap = m_alphabetMap, - initialState = m_initialState - }); + m_defs.Push(m_config); + m_config = config; - m_states = states; - m_alphabetMap = alphabet; - m_initialState = initialState; - - m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; } /// @@ -242,11 +234,9 @@ protected void Restore() { if (m_defs.Count == 0) throw new InvalidOperationException(); - var prev = m_defs.Pop(); - m_states = prev.states; - m_alphabetMap = prev.alphabetMap; - m_initialState = prev.initialState; - m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + m_config = m_defs.Pop(); + + m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; } protected override void Dispose(bool disposing) { diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/ByteAlphabet.cs --- a/Implab/Formats/ByteAlphabet.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/ByteAlphabet.cs Mon Mar 14 01:19:38 2016 +0300 @@ -13,6 +13,10 @@ return (int)symbol; } + public override byte GetSymbolByIndex(int index) { + return (byte)index; + } + public IEnumerable InputSymbols { get { return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast(); diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/CharAlphabet.cs --- a/Implab/Formats/CharAlphabet.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/CharAlphabet.cs Mon Mar 14 01:19:38 2016 +0300 @@ -13,6 +13,10 @@ return symbol; } + public override char GetSymbolByIndex(int index) { + return (char)index; + } + public override IEnumerable InputSymbols { get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast(); } } diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/JSON/JSONGrammar.cs --- a/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/JSON/JSONGrammar.cs Mon Mar 14 01:19:38 2016 +0300 @@ -1,6 +1,7 @@ using System.Linq; using Implab.Automaton.RegularExpressions; using System; +using Implab.Automaton; namespace Implab.Formats.JSON { class JSONGrammar : Grammar { @@ -35,8 +36,8 @@ get { return _instance.Value; } } - readonly RegularCharDFADefinition m_jsonDFA; - readonly RegularCharDFADefinition m_stringDFA; + readonly RegularDFA m_jsonDFA; + readonly RegularDFA m_stringDFA; public JSONGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); @@ -87,21 +88,17 @@ .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - m_jsonDFA = new RegularCharDFADefinition(new CharAlphabet()); - BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); - - - m_stringDFA = new RegularCharDFADefinition(new CharAlphabet()); - BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); + m_jsonDFA = BuildDFA(jsonExpression); + m_stringDFA = BuildDFA(jsonStringExpression); } - public RegularCharDFADefinition JsonDFA { + public RegularDFA JsonDFA { get { return m_jsonDFA; } } - public RegularDFADefinition JsonStringDFA { + public RegularDFA JsonStringDFA { get { return m_stringDFA; } @@ -110,6 +107,10 @@ Token SymbolRangeToken(char start, char stop) { return SymbolToken(Enumerable.Range(start,stop - start).Cast()); } + + protected override IAlphabetBuilder CreateAlphabet() { + return new CharAlphabet(); + } } } diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/JSON/JSONParser.cs --- a/Implab/Formats/JSON/JSONParser.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/JSON/JSONParser.cs Mon Mar 14 01:19:38 2016 +0300 @@ -86,9 +86,9 @@ return Token.New(input.Select(t => _alphabet.Translate(t)).ToArray()); } - static RegularDFADefinition CreateDFA(Token expr) { - var builder = new RegularDFABuilder(); - var dfa = new RegularDFADefinition(_alphabet); + static RegularDFA CreateDFA(Token expr) { + var builder = new RegularExpressionVisitor(); + var dfa = new RegularDFA(_alphabet); expr.Accept(builder); diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Formats/RegularCharDFADefinition.cs --- a/Implab/Formats/RegularCharDFADefinition.cs Thu Mar 10 01:19:33 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -using System; -using Implab.Automaton.RegularExpressions; - -namespace Implab.Formats { - public class RegularCharDFADefinition : RegularDFADefinition { - readonly CharAlphabet m_alphabet; - - public RegularCharDFADefinition(CharAlphabet alphabet) : base(alphabet) { - m_alphabet = alphabet; - } - - public new CharAlphabet InputAlphabet { - get { return m_alphabet; } - } - } -} - diff -r 0f70905b4652 -r 92d5278d1b10 Implab/Implab.csproj --- a/Implab/Implab.csproj Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Implab.csproj Mon Mar 14 01:19:38 2016 +0300 @@ -170,7 +170,6 @@ - @@ -183,13 +182,14 @@ - - + + +