Mercurial > pub > ImplabNet
changeset 172:92d5278d1b10 ref20160224
Working on text scanner
author | cin |
---|---|
date | Mon, 14 Mar 2016 01:19:38 +0300 (2016-03-13) |
parents | 0f70905b4652 |
children | ecfece82ca11 |
files | Implab/Automaton/DFAStateDescriptor.cs Implab/Automaton/DFATable.cs Implab/Automaton/IndexedAlphabetBase.cs Implab/Automaton/MapAlphabet.cs Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Implab/Automaton/RegularExpressions/Grammar.cs Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Implab/Automaton/RegularExpressions/RegularDFA.cs Implab/Automaton/RegularExpressions/RegularDFABuilder.cs Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Implab/Automaton/Scanner.cs Implab/Formats/ByteAlphabet.cs Implab/Formats/CharAlphabet.cs Implab/Formats/JSON/JSONGrammar.cs Implab/Formats/JSON/JSONParser.cs Implab/Formats/RegularCharDFADefinition.cs Implab/Implab.csproj |
diffstat | 18 files changed, 376 insertions(+), 339 deletions(-) [+] |
line wrap: on
line diff
--- a/Implab/Automaton/DFAStateDescriptor.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/DFAStateDescriptor.cs Mon Mar 14 01:19:38 2016 +0300 @@ -1,18 +1,18 @@ namespace Implab.Automaton { - public struct DFAStateDescriptior { + public struct DFAStateDescriptor { public readonly bool final; public readonly int[] transitions; - public DFAStateDescriptior(int[] transitions, bool final) { + public DFAStateDescriptor(int[] transitions, bool final) { this.transitions = transitions; this.final = final; } - public DFAStateDescriptior(int[] transitions) : this(transitions, false) { + public DFAStateDescriptor(int[] transitions) : this(transitions, false) { } - public DFAStateDescriptior(int size, bool final) { + public DFAStateDescriptor(int size, bool final) { Safe.ArgumentInRange(size, 0, int.MaxValue, "size"); this.final = final;
--- a/Implab/Automaton/DFATable.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/DFATable.cs Mon Mar 14 01:19:38 2016 +0300 @@ -100,6 +100,20 @@ return GetEnumerator(); } + public DFAStateDescriptor[] CreateTransitionTable() { + var table = new DFAStateDescriptor[StateCount]; + + foreach (var t in this) { + if (table[t.s1].transitions == null) + table[t.s1] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s1)); + if (table[t.s2].transitions == null) + table[t.s2] = new DFAStateDescriptor(AlphabetSize, IsFinalState(t.s2)); + table[t.s1].transitions[t.edge] = t.s2; + } + + return table; + } + /// <summary>Формирует множества конечных состояний перед началом работы алгоритма минимизации.</summary> /// <remarks> /// В процессе построения минимального автомата требуется разделить множество состояний,
--- a/Implab/Automaton/IndexedAlphabetBase.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/IndexedAlphabetBase.cs Mon Mar 14 01:19:38 2016 +0300 @@ -71,8 +71,16 @@ return true; } + public IEnumerable<T> GetSymbols(int cls) { + for (var i = 0; i < m_map.Length; i++) + if (m_map[i] == cls) + yield return GetSymbolByIndex(i); + } + public abstract int GetSymbolIndex(T symbol); + public abstract T GetSymbolByIndex(int index); + public abstract IEnumerable<T> InputSymbols { get; } /// <summary>
--- a/Implab/Automaton/MapAlphabet.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/MapAlphabet.cs Mon Mar 14 01:19:38 2016 +0300 @@ -38,7 +38,6 @@ Safe.ArgumentNotNull(symbols, "symbols"); m_nextCls = Math.Max(cls + 1, m_nextCls); - symbols = symbols.Distinct(); foreach (var symbol in symbols) m_map[symbol] = cls; @@ -68,6 +67,10 @@ return m_supportUnclassified || m_map.ContainsKey(symbol); } + + public IEnumerable<T> GetSymbols(int cls) { + return m_map.Where(p => p.Value == cls).Select(p => p.Key); + } #endregion } }
--- a/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/DFAStateDescriptorT.cs Mon Mar 14 01:19:38 2016 +0300 @@ -1,14 +1,14 @@ using System; namespace Implab.Automaton.RegularExpressions { - public struct DFAStateDescriptorT<T> { + public struct DFAStateDescriptor<T> { public readonly bool final; public readonly int[] transitions; public readonly T[] tags; - public DFAStateDescriptorT(int size, bool final, T[] tags) { + public DFAStateDescriptor(int size, bool final, T[] tags) { Safe.ArgumentAssert(size >= 0, "size"); this.final = final; this.tags = tags;
--- a/Implab/Automaton/RegularExpressions/Grammar.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/Grammar.cs Mon Mar 14 01:19:38 2016 +0300 @@ -66,23 +66,21 @@ return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); } - protected void BuildDFA(Token<TTag> lang, IDFATableBuilder<TTag> dfaTable, IAlphabetBuilder<TSymbol> dfaAlphabet) { - Safe.ArgumentNotNull(lang, "lang"); - Safe.ArgumentNotNull(dfaAlphabet, "dfaAlphabet"); - - var dfa = new RegularDFADefinition<TSymbol, TTag>(AlphabetBuilder); + protected abstract IAlphabetBuilder<TSymbol> CreateAlphabet(); - var builder = new RegularDFABuilder<TTag>(); + protected RegularDFA<TSymbol, TTag> BuildDFA(Token<TTag> regexp) { + + var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); - lang.Accept( builder ); + var visitor = new RegularExpressionVisitor<TTag>(); + regexp.Accept( visitor ); - builder.BuildDFA(dfa); + visitor.BuildDFA(dfa); if (dfa.IsFinalState(dfa.InitialState)) throw new ApplicationException("The specified language contains empty token"); - dfa.Optimize(dfaTable, dfaAlphabet); - + return dfa.Optimize(CreateAlphabet()); } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Mon Mar 14 01:19:38 2016 +0300 @@ -0,0 +1,8 @@ +using System; + +namespace Implab.Automaton.RegularExpressions { + public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { + void SetStateTag(int s, TTag[] tags); + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Mon Mar 14 01:19:38 2016 +0300 @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { + + readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); + readonly IAlphabet<TInput> m_alphabet; + + public RegularDFA(IAlphabet<TInput> alphabet) { + Safe.ArgumentNotNull(alphabet, "aplhabet"); + + m_alphabet = alphabet; + } + + + public IAlphabet<TInput> InputAlphabet { + get { + return m_alphabet; + } + } + + public void MarkFinalState(int s, TTag[] tags) { + MarkFinalState(s); + SetStateTag(s, tags); + } + + public void SetStateTag(int s, TTag[] tags) { + Safe.ArgumentNotNull(tags, "tags"); + m_tags[s] = tags; + } + + public TTag[] GetStateTag(int s) { + TTag[] tags; + return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; + } + + public new DFAStateDescriptor<TTag>[] CreateTransitionTable() { + var table = new DFAStateDescriptor<TTag>[StateCount]; + + foreach (var t in this) { + if (table[t.s1].transitions == null) + table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1)); + if (table[t.s2].transitions == null) + table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2)); + table[t.s1].transitions[t.edge] = t.s2; + } + + return table; + } + + /// <summary> + /// Optimize the specified alphabet. + /// </summary> + /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> + public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + var dfa = new RegularDFA<TInput, TTag>(alphabet); + + var states = new DummyAlphabet(StateCount); + var alphaMap = new Dictionary<int,int>(); + var stateMap = new Dictionary<int,int>(); + + Optimize(dfa, alphaMap, stateMap); + + // mark tags in the new DFA + foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) + dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); + + // make the alphabet for the new DFA + foreach (var pair in alphaMap) + alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value); + + return dfa; + } + + protected override IEnumerable<HashSet<int>> GroupFinalStates() { + var arrayComparer = new CustomEqualityComparer<TTag[]>( + (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), + x => x.Sum(it => x.GetHashCode()) + ); + return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); + } + + } +} +
--- a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs Thu Mar 10 01:19:33 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,180 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - /// <summary> - /// Используется для построения ДКА по регулярному выражению, сначала обходит - /// регулярное выражение и вычисляет followpos, затем используется метод - /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. - /// </summary> - public class RegularDFABuilder<TTag> : IVisitor<TTag> { - int m_idx; - Token<TTag> m_root; - HashSet<int> m_firstpos; - HashSet<int> m_lastpos; - - readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); - readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); - readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); - - public Dictionary<int, HashSet<int>> FollowposMap { - get { return m_followpos; } - } - - public HashSet<int> Followpos(int pos) { - HashSet<int> set; - return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); - } - - bool Nullable(object n) { - if (n is EmptyToken<TTag> || n is StarToken<TTag>) - return true; - var altToken = n as AltToken<TTag>; - if (altToken != null) - return Nullable(altToken.Left) || Nullable(altToken.Right); - var catToken = n as CatToken<TTag>; - if (catToken != null) - return Nullable(catToken.Left) && Nullable(catToken.Right); - return false; - } - - - public void Visit(AltToken<TTag> token) { - if (m_root == null) - m_root = token; - var firtspos = new HashSet<int>(); - var lastpos = new HashSet<int>(); - - token.Left.Accept(this); - firtspos.UnionWith(m_firstpos); - lastpos.UnionWith(m_lastpos); - - token.Right.Accept(this); - firtspos.UnionWith(m_firstpos); - lastpos.UnionWith(m_lastpos); - - m_firstpos = firtspos; - m_lastpos = lastpos; - } - - public void Visit(StarToken<TTag> token) { - if (m_root == null) - m_root = token; - token.Token.Accept(this); - - foreach (var i in m_lastpos) - Followpos(i).UnionWith(m_firstpos); - } - - public void Visit(CatToken<TTag> token) { - if (m_root == null) - m_root = token; - - var firtspos = new HashSet<int>(); - var lastpos = new HashSet<int>(); - token.Left.Accept(this); - firtspos.UnionWith(m_firstpos); - var leftLastpos = m_lastpos; - - token.Right.Accept(this); - lastpos.UnionWith(m_lastpos); - var rightFirstpos = m_firstpos; - - if (Nullable(token.Left)) - firtspos.UnionWith(rightFirstpos); - - if (Nullable(token.Right)) - lastpos.UnionWith(leftLastpos); - - m_firstpos = firtspos; - m_lastpos = lastpos; - - foreach (var i in leftLastpos) - Followpos(i).UnionWith(rightFirstpos); - - } - - public void Visit(EmptyToken<TTag> token) { - if (m_root == null) - m_root = token; - } - - public void Visit(SymbolToken<TTag> token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = token.Value; - m_firstpos = new HashSet<int>(new[] { m_idx }); - m_lastpos = new HashSet<int>(new[] { m_idx }); - } - - public void Visit(EndToken<TTag> token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; - m_firstpos = new HashSet<int>(new[] { m_idx }); - m_lastpos = new HashSet<int>(new[] { m_idx }); - Followpos(m_idx); - m_ends.Add(m_idx, token.Tag); - } - - public void BuildDFA(IDFATableBuilder dfa) { - Safe.ArgumentNotNull(dfa,"dfa"); - - var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( - (x, y) => x.SetEquals(y), - x => x.Sum(n => n.GetHashCode()) - )); - - var initialState = states.DefineSymbol(m_firstpos); - dfa.SetInitialState(initialState); - - var tags = GetStateTags(m_firstpos); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(initialState, tags); - - var inputMax = m_indexes.Values.Max(); - var queue = new Queue<HashSet<int>>(); - - queue.Enqueue(m_firstpos); - - while (queue.Count > 0) { - var state = queue.Dequeue(); - var s1 = states.Translate(state); - Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); - - for (int a = 0; a <= inputMax; a++) { - var next = new HashSet<int>(); - foreach (var p in state) { - if (m_indexes[p] == a) { - next.UnionWith(Followpos(p)); - } - } - if (next.Count > 0) { - int s2 = states.Translate(next); - if (s2 == DFAConst.UNCLASSIFIED_INPUT) { - s2 = states.DefineSymbol(next); - - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(s2, tags); - - queue.Enqueue(next); - } - dfa.Add(new AutomatonTransition(s1, s2, a)); - } - } - } - } - - TTag[] GetStateTags(IEnumerable<int> state) { - Debug.Assert(state != null); - return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); - } - - } -}
--- a/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Thu Mar 10 01:19:33 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - public class RegularDFADefinition<TInput, TTag> : DFATable { - - readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); - readonly IAlphabet<TInput> m_alphabet; - - public RegularDFADefinition(IAlphabet<TInput> alphabet) { - Safe.ArgumentNotNull(alphabet, "aplhabet"); - - m_alphabet = alphabet; - } - - - public IAlphabet<TInput> InputAlphabet { - get { - return m_alphabet; - } - } - - public void MarkFinalState(int s, TTag[] tags) { - MarkFinalState(s); - SetStateTag(s, tags); - } - - public void SetStateTag(int s, TTag[] tags) { - Safe.ArgumentNotNull(tags, "tags"); - m_tags[s] = tags; - } - - public TTag[] GetStateTag(int s) { - TTag[] tags; - return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0]; - } - - /// <summary> - /// Optimize the specified alphabet. - /// </summary> - /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> - public RegularDFADefinition<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { - Safe.ArgumentNotNull(alphabet, "alphabet"); - - var dfaTable = new RegularDFADefinition<TInput, TTag>(alphabet); - - var states = new DummyAlphabet(StateCount); - var alphaMap = new Dictionary<int,int>(); - var stateMap = new Dictionary<int,int>(); - Optimize(dfaTable, alphaMap, stateMap); - - foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value )) - dfaTable.SetStateTag(g.Key, g.SelectMany(x => x).ToArray()); - - return dfaTable; - } - - protected override IEnumerable<HashSet<int>> GroupFinalStates() { - var arrayComparer = new CustomEqualityComparer<TTag[]>( - (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)), - x => x.Sum(it => x.GetHashCode()) - ); - return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g)); - } - - } -} -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Mon Mar 14 01:19:38 2016 +0300 @@ -0,0 +1,184 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + /// <summary> + /// Используется для построения ДКА по регулярному выражению, сначала обходит + /// регулярное выражение и вычисляет followpos, затем используется метод + /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. + /// </summary> + public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { + int m_idx; + Token<TTag> m_root; + HashSet<int> m_firstpos; + HashSet<int> m_lastpos; + + readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); + readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); + readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); + + public Dictionary<int, HashSet<int>> FollowposMap { + get { return m_followpos; } + } + + public HashSet<int> Followpos(int pos) { + HashSet<int> set; + return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); + } + + bool Nullable(object n) { + if (n is EmptyToken<TTag> || n is StarToken<TTag>) + return true; + var altToken = n as AltToken<TTag>; + if (altToken != null) + return Nullable(altToken.Left) || Nullable(altToken.Right); + var catToken = n as CatToken<TTag>; + if (catToken != null) + return Nullable(catToken.Left) && Nullable(catToken.Right); + return false; + } + + + public void Visit(AltToken<TTag> token) { + if (m_root == null) + m_root = token; + var firtspos = new HashSet<int>(); + var lastpos = new HashSet<int>(); + + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + token.Right.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + } + + public void Visit(StarToken<TTag> token) { + if (m_root == null) + m_root = token; + token.Token.Accept(this); + + foreach (var i in m_lastpos) + Followpos(i).UnionWith(m_firstpos); + } + + public void Visit(CatToken<TTag> token) { + if (m_root == null) + m_root = token; + + var firtspos = new HashSet<int>(); + var lastpos = new HashSet<int>(); + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + var leftLastpos = m_lastpos; + + token.Right.Accept(this); + lastpos.UnionWith(m_lastpos); + var rightFirstpos = m_firstpos; + + if (Nullable(token.Left)) + firtspos.UnionWith(rightFirstpos); + + if (Nullable(token.Right)) + lastpos.UnionWith(leftLastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + + foreach (var i in leftLastpos) + Followpos(i).UnionWith(rightFirstpos); + + } + + public void Visit(EmptyToken<TTag> token) { + if (m_root == null) + m_root = token; + } + + public void Visit(SymbolToken<TTag> token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = token.Value; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + } + + public void Visit(EndToken<TTag> token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx, token.Tag); + } + + public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { + Safe.ArgumentNotNull(dfa,"dfa"); + + var states = new MapAlphabet<HashSet<int>>( + false, + new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + x => x.Sum(n => n.GetHashCode()) + )); + + var initialState = states.DefineSymbol(m_firstpos); + dfa.SetInitialState(initialState); + + var tags = GetStateTags(m_firstpos); + if (tags != null && tags.Length > 0) + dfa.MarkFinalState(initialState, tags); + + var inputMax = m_indexes.Values.Max(); + var queue = new Queue<HashSet<int>>(); + + queue.Enqueue(m_firstpos); + + while (queue.Count > 0) { + var state = queue.Dequeue(); + var s1 = states.Translate(state); + Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); + + for (int a = 0; a <= inputMax; a++) { + var next = new HashSet<int>(); + foreach (var p in state) { + if (m_indexes[p] == a) { + next.UnionWith(Followpos(p)); + } + } + if (next.Count > 0) { + int s2 = states.Translate(next); + if (s2 == DFAConst.UNCLASSIFIED_INPUT) { + s2 = states.DefineSymbol(next); + + tags = GetStateTags(next); + if (tags != null && tags.Length > 0) { + dfa.MarkFinalState(s2); + dfa.SetStateTag(s2, tags); + } + + queue.Enqueue(next); + } + dfa.Add(new AutomatonTransition(s1, s2, a)); + } + } + } + } + + TTag[] GetStateTags(IEnumerable<int> state) { + Debug.Assert(state != null); + return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); + } + + } +}
--- a/Implab/Automaton/Scanner.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Automaton/Scanner.cs Mon Mar 14 01:19:38 2016 +0300 @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using Implab.Components; +using Implab.Automaton.RegularExpressions; namespace Implab.Automaton { /// <summary> @@ -14,19 +15,23 @@ /// конца токена и допустимости текущего символа. /// </remarks> public abstract class Scanner<TTag> : Disposable { - struct ScannerConfig { - public DFAStateDescriptior<TTag>[] states; - public int[] alphabetMap; - public int initialState; + protected struct ScannerConfig { + public readonly DFAStateDescriptor<TTag>[] states; + public readonly int[] alphabet; + public readonly int initialState; + + public ScannerConfig(DFAStateDescriptor<TTag>[] states, int[] alphabet, int initialState) { + this.initialState = initialState; + this.alphabet = alphabet; + this.states = states; + } } Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); - DFAStateDescriptior<TTag>[] m_states; - int[] m_alphabetMap; - int m_initialState; + ScannerConfig m_config; - protected DFAStateDescriptior<TTag> m_currentState; + protected DFAStateDescriptor<TTag> m_currentState; int m_previewCode; protected int m_tokenLen; @@ -41,15 +46,11 @@ int m_chunkSize = 1024; // 1k int m_limit = 10 * 1024 * 1024; // 10Mb - protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { - Safe.ArgumentNotEmpty(states, "states"); - Safe.ArgumentNotNull(alphabet, "alphabet"); + protected Scanner(ScannerConfig config) { + Safe.ArgumentNotEmpty(config.states, "config.states"); + Safe.ArgumentNotNull(config.alphabet, "config.alphabet"); - m_states = states; - m_alphabetMap = alphabet; - m_initialState = initialState; - - Feed(new char[0]); + m_config = config; } /// <summary> @@ -110,7 +111,7 @@ /// </summary> protected TTag[] TokenTags { get { - return m_currentState.tag; + return m_currentState.tags; } } @@ -133,7 +134,7 @@ if (m_pointer >= m_bufferSize) return false; - m_currentState = m_states[m_initialState]; + m_currentState = m_config.states[m_config.initialState]; m_tokenLen = 0; m_tokenOffset = m_pointer; int nextState; @@ -151,7 +152,7 @@ ) ); } - m_currentState = m_states[nextState]; + m_currentState = m_config.states[nextState]; m_tokenLen++; } while (Shift()); @@ -172,7 +173,7 @@ return false; } - m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; return true; } @@ -217,23 +218,14 @@ /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей /// группировки. /// </summary> - /// <param name="states">Таблица состояний нового ДКА</param> - /// <param name="alphabet">Таблица входных символов для нового ДКА</param> - /// <param name = "initialState"></param> - protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { - Safe.ArgumentNotNull(states, "dfa"); + /// <param name = "config"></param> + protected void Switch(ScannerConfig config) { + Safe.ArgumentNotNull(config.states, "config.states"); - m_defs.Push(new ScannerConfig { - states = m_states, - alphabetMap = m_alphabetMap, - initialState = m_initialState - }); + m_defs.Push(m_config); + m_config = config; - m_states = states; - m_alphabetMap = alphabet; - m_initialState = initialState; - - m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; } /// <summary> @@ -242,11 +234,9 @@ protected void Restore() { if (m_defs.Count == 0) throw new InvalidOperationException(); - var prev = m_defs.Pop(); - m_states = prev.states; - m_alphabetMap = prev.alphabetMap; - m_initialState = prev.initialState; - m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + m_config = m_defs.Pop(); + + m_previewCode = m_config.alphabet[m_buffer[m_pointer]]; } protected override void Dispose(bool disposing) {
--- a/Implab/Formats/ByteAlphabet.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/ByteAlphabet.cs Mon Mar 14 01:19:38 2016 +0300 @@ -13,6 +13,10 @@ return (int)symbol; } + public override byte GetSymbolByIndex(int index) { + return (byte)index; + } + public IEnumerable<byte> InputSymbols { get { return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>();
--- a/Implab/Formats/CharAlphabet.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/CharAlphabet.cs Mon Mar 14 01:19:38 2016 +0300 @@ -13,6 +13,10 @@ return symbol; } + public override char GetSymbolByIndex(int index) { + return (char)index; + } + public override IEnumerable<char> InputSymbols { get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } }
--- a/Implab/Formats/JSON/JSONGrammar.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/JSON/JSONGrammar.cs Mon Mar 14 01:19:38 2016 +0300 @@ -1,6 +1,7 @@ using System.Linq; using Implab.Automaton.RegularExpressions; using System; +using Implab.Automaton; namespace Implab.Formats.JSON { class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { @@ -35,8 +36,8 @@ get { return _instance.Value; } } - readonly RegularCharDFADefinition<TokenType> m_jsonDFA; - readonly RegularCharDFADefinition<TokenType> m_stringDFA; + readonly RegularDFA<char, TokenType> m_jsonDFA; + readonly RegularDFA<char, TokenType> m_stringDFA; public JSONGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); @@ -87,21 +88,17 @@ .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - m_jsonDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); - BuildDFA(jsonExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); - - - m_stringDFA = new RegularCharDFADefinition<TokenType>(new CharAlphabet()); - BuildDFA(jsonStringExpression, m_jsonDFA, m_jsonDFA.InputAlphabet); + m_jsonDFA = BuildDFA(jsonExpression); + m_stringDFA = BuildDFA(jsonStringExpression); } - public RegularCharDFADefinition<TokenType> JsonDFA { + public RegularDFA<char, TokenType> JsonDFA { get { return m_jsonDFA; } } - public RegularDFADefinition<char,TokenType> JsonStringDFA { + public RegularDFA<char,TokenType> JsonStringDFA { get { return m_stringDFA; } @@ -110,6 +107,10 @@ Token<TokenType> SymbolRangeToken(char start, char stop) { return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); } + + protected override IAlphabetBuilder<char> CreateAlphabet() { + return new CharAlphabet(); + } } }
--- a/Implab/Formats/JSON/JSONParser.cs Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Formats/JSON/JSONParser.cs Mon Mar 14 01:19:38 2016 +0300 @@ -86,9 +86,9 @@ return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); } - static RegularDFADefinition<JsonTokenType,object> CreateDFA(Token<object> expr) { - var builder = new RegularDFABuilder<object>(); - var dfa = new RegularDFADefinition<JsonTokenType,object>(_alphabet); + static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { + var builder = new RegularExpressionVisitor<object>(); + var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); expr.Accept(builder);
--- a/Implab/Formats/RegularCharDFADefinition.cs Thu Mar 10 01:19:33 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -using System; -using Implab.Automaton.RegularExpressions; - -namespace Implab.Formats { - public class RegularCharDFADefinition<TTag> : RegularDFADefinition<char,TTag> { - readonly CharAlphabet m_alphabet; - - public RegularCharDFADefinition(CharAlphabet alphabet) : base(alphabet) { - m_alphabet = alphabet; - } - - public new CharAlphabet InputAlphabet { - get { return m_alphabet; } - } - } -} -
--- a/Implab/Implab.csproj Thu Mar 10 01:19:33 2016 +0300 +++ b/Implab/Implab.csproj Mon Mar 14 01:19:38 2016 +0300 @@ -170,7 +170,6 @@ <Compile Include="Automaton\RegularExpressions\Token.cs" /> <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> <Compile Include="Automaton\AutomatonTransition.cs" /> - <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" /> <Compile Include="Formats\JSON\JSONElementContext.cs" /> <Compile Include="Formats\JSON\JSONElementType.cs" /> <Compile Include="Formats\JSON\JSONGrammar.cs" /> @@ -183,13 +182,14 @@ <Compile Include="Formats\JSON\StringTranslator.cs" /> <Compile Include="Automaton\MapAlphabet.cs" /> <Compile Include="Automaton\DummyAlphabet.cs" /> - <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> <Compile Include="Formats\CharAlphabet.cs" /> <Compile Include="Formats\ByteAlphabet.cs" /> - <Compile Include="Formats\RegularCharDFADefinition.cs" /> <Compile Include="Automaton\IDFATable.cs" /> <Compile Include="Automaton\IDFATableBuilder.cs" /> <Compile Include="Automaton\DFATable.cs" /> + <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> + <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> + <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> <Compile Include="Automaton\RegularExpressions\DFAStateDescriptorT.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />