Mercurial > pub > ImplabNet
changeset 164:ec35731ae299 ref20160224
Almost complete DFA refactoring
line wrap: on
line diff
--- a/Implab/Automaton/ByteAlphabet.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -using System; - -namespace Implab.Automaton { - public class ByteAlphabet : IndexedAlphabetBase<byte> { - public ByteAlphabet() : base(byte.MaxValue + 1){ - } - - #region implemented abstract members of IndexedAlphabetBase - - public override int GetSymbolIndex(byte symbol) { - return (int)symbol; - } - - public override System.Collections.Generic.IEnumerable<byte> InputSymbols { - get { - throw new NotImplementedException(); - } - } - - #endregion - } -} -
--- a/Implab/Automaton/CDFADefinition.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -namespace Implab.Automaton { - public class CDFADefinition : DFADefinition { - readonly CharAlphabet m_alphabet; - - public CharAlphabet Alphabet { - get { return m_alphabet; } - } - - public CDFADefinition(CharAlphabet alphabet): base(alphabet.Count) { - m_alphabet = alphabet; - } - - public CDFADefinition Optimize() { - - return (CDFADefinition)Optimize(alphabet => new CDFADefinition((CharAlphabet)alphabet), m_alphabet, new CharAlphabet()); - } - - public void PrintDFA() { - PrintDFA(m_alphabet); - } - } -}
--- a/Implab/Automaton/CharAlphabet.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.Automaton { - public class CharAlphabet: IndexedAlphabetBase<char> { - - public CharAlphabet() - : base(char.MaxValue + 1) { - } - - public override int GetSymbolIndex(char symbol) { - return symbol; - } - - public override IEnumerable<char> InputSymbols { - get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); } - } - } -}
--- a/Implab/Automaton/DFADefinition.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,213 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton { - public class DFADefinition<TInput, TState, TTag> : IDFADefinitionBuilder<TTag>, IDFADefinition<TInput, TState, TTag> { - DFAStateDescriptior<TTag>[] m_dfaTable; - readonly IAlphabet<TInput> m_inputAlphabet; - readonly IAlphabet<TState> m_stateAlphabet; - - readonly Dictionary<int, TTag[]> m_finalStates = new Dictionary<int, TTag[]>(); - readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); - - public DFADefinition(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { - Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); - Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); - - m_inputAlphabet = inputAlphabet; - m_stateAlphabet = stateAlphabet; - } - - public void DefineTransition(int s1, int s2, int symbol) { - Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count-1, "s1"); - Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count-1, "s2"); - Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count-1, "symbol"); - - m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); - } - - - #region IDFADefinition implementation - - public DFAStateDescriptior<TTag>[] GetTransitionTable() { - if (m_dfaTable == null) { - m_dfaTable = new DFAStateDescriptior<TTag>[m_stateAlphabet.Count]; - - foreach (var pair in m_finalStates) { - var idx = pair.Key; - - m_dfaTable[idx].final = true; - m_dfaTable[idx].tag = m_dfaTable[idx].tag != null ? - m_dfaTable[idx].tag.Concat(pair.Value).Distinct().ToArray() : - pair.Value; - } - - foreach (var t in m_transitions) { - if (m_dfaTable[t.s1].transitions == null) { - m_dfaTable[t.s1].transitions = new int[m_inputAlphabet.Count]; - for (int i = 0; i < m_dfaTable[t.s1].transitions.Length; i++) - m_dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE; - } - - m_dfaTable[t.s1].transitions[t.edge] = t.s2; - } - } - return m_dfaTable; - } - - public IAlphabet<TInput> InputAlphabet { - get { - return m_inputAlphabet; - } - } - - public IAlphabet<TState> StateAlphabet { - get { - return m_stateAlphabet; - } - } - - #endregion - - #region IDFADefinitionBuilder - - public void DefineTransition(int s1, int s2, int symbol) { - Safe.ArgumentInRange(s1, 0, m_stateAlphabet.Count - 1, "s1"); - Safe.ArgumentInRange(s2, 0, m_stateAlphabet.Count - 1, "s2"); - Safe.ArgumentInRange(symbol, 0, m_inputAlphabet.Count - 1, "symbol"); - - m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); - } - - public void MarkFinalState(int state, params TTag[] tags) { - m_finalStates[state] = tags; - } - - - #endregion - - protected void Optimize(IDFADefinitionBuilder<TTag> optimalDFA,IAlphabetBuilder<TInput> optimalInputAlphabet, IAlphabetBuilder<TState> optimalStateAlphabet) { - Safe.ArgumentNotNull(optimalDFA, "dfa"); - Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); - Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); - - var setComparer = new CustomEqualityComparer<HashSet<int>>( - (x, y) => x.SetEquals(y), - s => s.Sum(x => x.GetHashCode()) - ); - - var arrayComparer = new CustomEqualityComparer<TTag[]>( - (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), - a => a.Sum(x => x.GetHashCode()) - ); - - var optimalStates = new HashSet<HashSet<int>>(setComparer); - var queue = new HashSet<HashSet<int>>(setComparer); - - // получаем конечные состояния, сгруппированные по маркерам - optimalStates.UnionWith( - m_finalStates - .GroupBy(pair => pair.Value, arrayComparer) - .Select( - g => new HashSet<int>( - g.Select( pair => pair.Key) - ) - ) - ); - - var state = new HashSet<int>( - Enumerable - .Range(0, m_stateAlphabet.Count - 1) - .Where(i => !m_finalStates.ContainsKey(i)) - ); - optimalStates.Add(state); - queue.Add(state); - - var rmap = m_transitions - .GroupBy(t => t.s2) - .ToLookup( - g => g.Key, // s2 - g => g.ToLookup(t => t.edge, t => t.s1) - ); - - while (queue.Count > 0) { - var stateA = queue.First(); - queue.Remove(stateA); - - for (int c = 0; c < m_inputAlphabet.Count; c++) { - var stateX = new HashSet<int>(); - foreach(var a in stateA) - stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' - - foreach (var stateY in optimalStates.ToArray()) { - if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { - var stateR1 = new HashSet<int>(stateY); - var stateR2 = new HashSet<int>(stateY); - - stateR1.IntersectWith(stateX); - stateR2.ExceptWith(stateX); - - optimalStates.Remove(stateY); - optimalStates.Add(stateR1); - optimalStates.Add(stateR2); - - if (queue.Contains(stateY)) { - queue.Remove(stateY); - queue.Add(stateR1); - queue.Add(stateR2); - } else { - queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); - } - } - } - } - } - - // карта получения оптимального состояния по соотвествующему ему простому состоянию - var statesMap = m_stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); - - // получаем минимальный алфавит - // входные символы не различимы, если Move(s,a1) == Move(s,a2) - var optimalAlphabet = m_transitions - .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); - - var alphabetMap = m_inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); - - var optimalTags = m_finalStates - .GroupBy(pair => statesMap[pair.Key]) - .ToDictionary( - g => g.Key, - g => g.SelectMany(pair => pair.Value).ToArray() - ); - - // построение автомата - foreach (var pair in optimalTags) - optimalDFA.MarkFinalState(pair.Key, pair.Value); - - foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) - optimalDFA.DefineTransition(t.s1, t.s2, t.edge); - } - - public void PrintDFA() { - - var inputMap = InputAlphabet.CreateReverseMap(); - var stateMap = StateAlphabet.CreateReverseMap(); - - for (int i = 0; i < inputMap.Length; i++) { - Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); - } - - foreach(var t in m_transitions) - Console.WriteLine( - "S{0} -{1}-> S{2}{3}", - stateMap[t.s1], - String.Join(",", inputMap[t.edge]), - stateMap[t.s2], - m_finalStates.ContainsKey(t.s2) ? "$" : "" - ); - - } - } -}
--- a/Implab/Automaton/DFAStateDescriptor.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/DFAStateDescriptor.cs Thu Feb 25 02:11:13 2016 +0300 @@ -1,10 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace Implab.Automaton { +namespace Implab.Automaton { public struct DFAStateDescriptior<TTag> { public bool final; public TTag[] tag;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/DFATransitionTable.cs Thu Feb 25 02:11:13 2016 +0300 @@ -0,0 +1,251 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class DFATransitionTable<TTag> : IDFATransitionTableBuilder<TTag> { + DFAStateDescriptior<TTag>[] m_dfaTable; + + int m_stateCount; + int m_symbolCount; + int m_initialState; + + readonly Dictionary<int, TTag[]> m_finalStates = new Dictionary<int, TTag[]>(); + readonly HashSet<AutomatonTransition> m_transitions = new HashSet<AutomatonTransition>(); + + + #region IDFADefinition implementation + + public DFAStateDescriptior<TTag>[] GetTransitionTable() { + if (m_dfaTable == null) { + if (m_stateCount <= 0) + throw new InvalidOperationException("Invalid automaton definition: states count = {0}", m_stateCount); + if (m_symbolCount <= 0) + throw new InvalidOperationException("Invalid automaton definition: symbols count = {0}", m_symbolCount); + + m_dfaTable = ConstructTransitionTable(); + } + return m_dfaTable; + } + + public bool IsFinalState(int s) { + Safe.ArgumentInRange(s, 0, m_stateCount, "s"); + + return m_finalStates.ContainsKey(s); + } + + public IEnumerable<KeyValuePair<int,TTag[]>> FinalStates { + get { + return m_finalStates; + } + } + + public int StateCount { + get { return m_stateCount; } + } + + public int AlphabetSize { + get { return m_symbolCount; } + } + + public int InitialState { + get { return m_initialState; } + } + + #endregion + + protected virtual DFAStateDescriptior<TTag>[] ConstructTransitionTable() { + var dfaTable = new DFAStateDescriptior<TTag>[m_stateCount]; + + foreach (var pair in m_finalStates) { + var idx = pair.Key; + + dfaTable[idx].final = true; + dfaTable[idx].tag = pair.Value; + } + + foreach (var t in m_transitions) { + if (dfaTable[t.s1].transitions == null) { + dfaTable[t.s1].transitions = new int[m_symbolCount]; + for (int i = 0; i < dfaTable[t.s1].transitions.Length; i++) + dfaTable[t.s1].transitions[i] = DFAConst.UNREACHABLE_STATE; + } + + dfaTable[t.s1].transitions[t.edge] = t.s2; + } + } + + #region IDFADefinitionBuilder + + public void DefineTransition(int s1, int s2, int symbol) { + if (m_dfaTable != null) + throw new InvalidOperationException("The transition table is already built"); + + Safe.ArgumentAssert(s1 > 0, "s1"); + Safe.ArgumentAssert(s2 > 0, "s2"); + Safe.ArgumentAssert(symbol >= 0, "symbol"); + + m_stateCount = Math.Max(Math.Max(m_stateCount, s1 + 1), s2 + 1); + m_symbolCount = Math.Max(m_symbolCount, symbol + 1); + + m_transitions.Add(new AutomatonTransition(s1, s2, symbol)); + } + + public void MarkFinalState(int state, params TTag[] tags) { + if (m_dfaTable != null) + throw new InvalidOperationException("The transition table is already built"); + + m_finalStates[state] = tags; + } + + public void SetInitialState(int s) { + Safe.ArgumentAssert(s >= 0, "s"); + m_initialState = s; + } + + + #endregion + + protected void Optimize<TInput, TState>( + IDFATransitionTableBuilder<TTag> optimalDFA, + IAlphabet<TInput> inputAlphabet, + IAlphabetBuilder<TInput> optimalInputAlphabet, + IAlphabet<TState> stateAlphabet, + IAlphabetBuilder<TState> optimalStateAlphabet + ) { + Safe.ArgumentNotNull(optimalDFA, "dfa"); + Safe.ArgumentNotNull(optimalInputAlphabet, "optimalInputAlphabet"); + Safe.ArgumentNotNull(optimalStateAlphabet, "optimalStateAlphabet"); + Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); + Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); + + if (inputAlphabet.Count != m_symbolCount) + throw new InvalidOperationException("The input symbols aphabet mismatch"); + if (stateAlphabet.Count != m_stateCount) + throw new InvalidOperationException("The states alphabet mismatch"); + + var setComparer = new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + s => s.Sum(x => x.GetHashCode()) + ); + + var arrayComparer = new CustomEqualityComparer<TTag[]>( + (x,y) => (new HashSet<int>(x)).SetEquals(new HashSet<int>(y)), + a => a.Sum(x => x.GetHashCode()) + ); + + var optimalStates = new HashSet<HashSet<int>>(setComparer); + var queue = new HashSet<HashSet<int>>(setComparer); + + // получаем конечные состояния, сгруппированные по маркерам + optimalStates.UnionWith( + m_finalStates + .GroupBy(pair => pair.Value, arrayComparer) + .Select( + g => new HashSet<int>( + g.Select( pair => pair.Key) + ) + ) + ); + + var state = new HashSet<int>( + Enumerable + .Range(0, m_stateCount - 1) + .Where(i => !m_finalStates.ContainsKey(i)) + ); + optimalStates.Add(state); + queue.Add(state); + + var rmap = m_transitions + .GroupBy(t => t.s2) + .ToLookup( + g => g.Key, // s2 + g => g.ToLookup(t => t.edge, t => t.s1) + ); + + while (queue.Count > 0) { + var stateA = queue.First(); + queue.Remove(stateA); + + for (int c = 0; c < m_symbolCount; c++) { + var stateX = new HashSet<int>(); + foreach(var a in stateA) + stateX.UnionWith(rmap[a][c]); // all states from wich 'c' leads to 'a' + + foreach (var stateY in optimalStates.ToArray()) { + if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { + var stateR1 = new HashSet<int>(stateY); + var stateR2 = new HashSet<int>(stateY); + + stateR1.IntersectWith(stateX); + stateR2.ExceptWith(stateX); + + optimalStates.Remove(stateY); + optimalStates.Add(stateR1); + optimalStates.Add(stateR2); + + if (queue.Contains(stateY)) { + queue.Remove(stateY); + queue.Add(stateR1); + queue.Add(stateR2); + } else { + queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); + } + } + } + } + } + + // карта получения оптимального состояния по соотвествующему ему простому состоянию + var statesMap = stateAlphabet.Reclassify(optimalStateAlphabet, optimalStates); + + // получаем минимальный алфавит + // входные символы не различимы, если Move(s,a1) == Move(s,a2) + var optimalAlphabet = m_transitions + .GroupBy(t => Tuple.Create(statesMap[t.s1], statesMap[t.s2]), t => t.edge); + + var alphabetMap = inputAlphabet.Reclassify(optimalInputAlphabet, optimalAlphabet); + + var optimalTags = m_finalStates + .GroupBy(pair => statesMap[pair.Key]) + .ToDictionary( + g => g.Key, + g => g.SelectMany(pair => pair.Value).ToArray() + ); + + // построение автомата + optimalDFA.SetInitialState(statesMap[m_initialState]); + + foreach (var pair in optimalTags) + optimalDFA.MarkFinalState(pair.Key, pair.Value); + + foreach (var t in m_transitions.Select(t => new AutomatonTransition(statesMap[t.s1],statesMap[t.s2],alphabetMap[t.edge])).Distinct()) + optimalDFA.DefineTransition(t.s1, t.s2, t.edge); + + } + + protected void PrintDFA<TInput, TState>(IAlphabet<TInput> inputAlphabet, IAlphabet<TState> stateAlphabet) { + Safe.ArgumentNotNull(inputAlphabet, "inputAlphabet"); + Safe.ArgumentNotNull(stateAlphabet, "stateAlphabet"); + + var inputMap = inputAlphabet.CreateReverseMap(); + var stateMap = stateAlphabet.CreateReverseMap(); + + for (int i = 0; i < inputMap.Length; i++) + Console.WriteLine("C{0}: {1}", i, String.Join(",", inputMap[i])); + + + foreach(var t in m_transitions) + Console.WriteLine( + "[{0}] -{{{1}}}-> [{2}]{3}", + stateMap[t.s1], + String.Join(",", inputMap[t.edge]), + stateMap[t.s2], + m_finalStates.ContainsKey(t.s2) ? "$" : "" + ); + + } + + } +}
--- a/Implab/Automaton/DummyAlphabet.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/DummyAlphabet.cs Thu Feb 25 02:11:13 2016 +0300 @@ -3,8 +3,16 @@ using System.Linq; namespace Implab.Automaton { + /// <summary> + /// Dummy alphabet consists of integer numbers which are identical to their classes. + /// </summary> public class DummyAlphabet : IAlphabet<int> { readonly int m_size; + + /// <summary> + /// Creates a new dummy alphabet with given size. + /// </summary> + /// <param name="size">The size of the alphabet, must be greater then zero.</param> public DummyAlphabet(int size) { Safe.ArgumentAssert(size > 0); m_size = 0; @@ -21,6 +29,8 @@ Safe.ArgumentNotNull(classes, "classes"); var map = new int[m_size]; foreach (var cls in classes) { + if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT)) + continue; var newid = newAlphabet.DefineClass(cls); foreach (var id in cls) map[id] = newid;
--- a/Implab/Automaton/EDFADefinition.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -using Implab; -using System; - -namespace Implab.Parsing { - public class EDFADefinition<T> : DFADefinition where T : struct, IConvertible { - readonly EnumAlphabet<T> m_alphabet; - - public EnumAlphabet<T> Alphabet { - get { return m_alphabet; } - } - - public EDFADefinition(EnumAlphabet<T> alphabet) : base(alphabet.Count) { - m_alphabet = alphabet; - } - - public void DefineTransition(int s1, int s2, T input) { - DefineTransition(s1, s2, m_alphabet.Translate(input)); - } - - public EDFADefinition<T> Optimize() { - - return (EDFADefinition<T>)Optimize(alphabet => new EDFADefinition<T>((EnumAlphabet<T>)alphabet), m_alphabet, new EnumAlphabet<T>()); - } - - public void PrintDFA() { - PrintDFA(m_alphabet); - } - } -}
--- a/Implab/Automaton/EnumAlphabet.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/EnumAlphabet.cs Thu Feb 25 02:11:13 2016 +0300 @@ -12,46 +12,49 @@ /// <typeparam name="T">Тип перечислений</typeparam> public class EnumAlphabet<T> : IndexedAlphabetBase<T> where T : struct, IConvertible { [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] - static readonly T[] _symbols; - static readonly EnumAlphabet<T> _fullAlphabet; + static readonly Lazy<T[]> _symbols = new Lazy<T[]>(GetSymbols); + + [SuppressMessage("Microsoft.Design", "CA1000:DoNotDeclareStaticMembersOnGenericTypes")] + static readonly Lazy<EnumAlphabet<T>> _fullAlphabet = new Lazy<EnumAlphabet<T>>(CreateEnumAlphabet); + + static EnumAlphabet<T> CreateEnumAlphabet() { + var symbols = _symbols.Value; - [SuppressMessage("Microsoft.Design", "CA1065:DoNotRaiseExceptionsInUnexpectedLocations")] - static EnumAlphabet() { + if ( + symbols[symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= symbols.Length + || symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 + ) + throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); + + return new EnumAlphabet<T>(symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); + } + + static T[] GetSymbols() { if (!typeof(T).IsEnum) throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) throw new InvalidOperationException("Only enums based on Int32 are supported"); - - _symbols = ((T[])Enum.GetValues(typeof(T))) + + return ((T[])Enum.GetValues(typeof(T))) .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) .ToArray(); - - if ( - _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length - || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 - ) - throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); - - _fullAlphabet = new EnumAlphabet<T>(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); } - - public static EnumAlphabet<T> FullAlphabet { get { - return _fullAlphabet; + return _fullAlphabet.Value; } } public EnumAlphabet() - : base(_symbols.Length) { + : base(_symbols.Value.Length) { } public EnumAlphabet(int[] map) : base(map) { - Debug.Assert(map.Length == _symbols.Length); + Debug.Assert(map.Length == _symbols.Value.Length); } @@ -60,7 +63,7 @@ } public override IEnumerable<T> InputSymbols { - get { return _symbols; } + get { return _symbols.Value; } } }
--- a/Implab/Automaton/IAlphabetBuilder.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/IAlphabetBuilder.cs Thu Feb 25 02:11:13 2016 +0300 @@ -17,9 +17,6 @@ /// <param name="symbols">Множестов исходных символов</param> /// <returns>Идентификатор символа алфавита.</returns> int DefineClass(IEnumerable<TSymbol> symbols); - - - } }
--- a/Implab/Automaton/IDFADefinition.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ - -namespace Implab.Automaton { - /// <summary> - /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. - /// </summary> - /// <example> - /// class MyAutomaton { - /// int m_current; - /// readonly DFAStateDescriptor<string>[] m_automaton; - /// readonly IAlphabet<MyCommands> m_commands; - /// - /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { - /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); - /// m_automaton = definition.GetTransitionTable(); - /// m_commands = definition.InputAlphabet; - /// } - /// - /// // defined a method which will move the automaton to the next state - /// public void Move(MyCommands cmd) { - /// // use transition map to determine the next state - /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; - /// - /// // validate that we aren't in the unreachable state - /// if (next == DFAConst.UNREACHABLE_STATE) - /// throw new InvalidOperationException("The specified command is invalid"); - /// - /// // if everything is ok - /// m_current = next; - /// } - /// } - /// </example> - public interface IDFADefinition<TInput, TState, TTag> { - /// <summary> - /// Алфавит входных символов - /// </summary> - /// <value>The input alphabet.</value> - IAlphabet<TInput> InputAlphabet { - get; - } - - /// <summary> - /// Алфавит состояний автомата - /// </summary> - /// <value>The state alphabet.</value> - IAlphabet<TState> StateAlphabet { - get; - } - - /// <summary> - /// Таблица переходов состояний автомата - /// </summary> - /// <returns>The transition table.</returns> - DFAStateDescriptior<TTag>[] GetTransitionTable(); - - } -}
--- a/Implab/Automaton/IDFADefinitionBuilder.cs Wed Feb 24 20:12:52 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -using System; - -namespace Implab.Automaton { - public interface IDFADefinitionBuilder<TTag> { - /// <summary> - /// Marks the state as final and assings tags. - /// </summary> - /// <param name="state">State.</param> - /// <param name="tags">Tags.</param> - void MarkFinalState(int state, params TTag[] tags); - - /// <summary> - /// Defines the transition from <paramref name="s1"/> to - /// <paramref name="s2"/> with input <paramref name="symbol"/>. - /// </summary> - /// <param name="s1">S1.</param> - /// <param name="s2">S2.</param> - /// <param name="symbol">Symbol.</param> - void DefineTransition(int s1, int s2, int symbol); - - } -} -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/IDFATransitionTable.cs Thu Feb 25 02:11:13 2016 +0300 @@ -0,0 +1,59 @@ +using System.Collections.Generic; + + +namespace Implab.Automaton { + /// <summary> + /// Полностью описывает DFA автомат, его поведение, состояние и входные символы. + /// </summary> + /// <example> + /// class MyAutomaton { + /// int m_current; + /// readonly DFAStateDescriptor<string>[] m_automaton; + /// readonly IAlphabet<MyCommands> m_commands; + /// + /// public MyAutomaton(IDFADefinition<MyCommands,MyStates,string> definition) { + /// m_current = definition.StateAlphabet.Translate(MyStates.Initial); + /// m_automaton = definition.GetTransitionTable(); + /// m_commands = definition.InputAlphabet; + /// } + /// + /// // defined a method which will move the automaton to the next state + /// public void Move(MyCommands cmd) { + /// // use transition map to determine the next state + /// var next = m_automaton[m_current].transitions[m_commands.Translate(cmd)]; + /// + /// // validate that we aren't in the unreachable state + /// if (next == DFAConst.UNREACHABLE_STATE) + /// throw new InvalidOperationException("The specified command is invalid"); + /// + /// // if everything is ok + /// m_current = next; + /// } + /// } + /// </example> + public interface IDFATransitionTable<TTag> { + /// <summary> + /// Таблица переходов состояний автомата + /// </summary> + /// <returns>The transition table.</returns> + DFAStateDescriptior<TTag>[] GetTransitionTable(); + + int StateCount { + get; + } + + int AlphabetSize { + get; + } + + int InitialState { + get; + } + + bool IsFinalState(int s); + + IEnumerable<KeyValuePair<int,TTag[]>> FinalStates { + get; + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/IDFATransitionTableBuilder.cs Thu Feb 25 02:11:13 2016 +0300 @@ -0,0 +1,25 @@ +using System; + +namespace Implab.Automaton { + public interface IDFATransitionTableBuilder<TTag> : IDFATransitionTable<TTag> { + /// <summary> + /// Marks the state as final and assings tags. + /// </summary> + /// <param name="state">State.</param> + /// <param name="tags">Tags.</param> + void MarkFinalState(int state, params TTag[] tags); + + /// <summary> + /// Defines the transition from <paramref name="s1"/> to + /// <paramref name="s2"/> with input <paramref name="symbol"/>. + /// </summary> + /// <param name="s1">S1.</param> + /// <param name="s2">S2.</param> + /// <param name="symbol">Symbol.</param> + void DefineTransition(int s1, int s2, int symbol); + + void SetInitialState(int s); + + } +} +
--- a/Implab/Automaton/IndexedAlphabetBase.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/IndexedAlphabetBase.cs Thu Feb 25 02:11:13 2016 +0300 @@ -9,8 +9,6 @@ /// Indexed alphabet is the finite set of symbols where each symbol has a zero-based unique index. /// </summary> public abstract class IndexedAlphabetBase<T> : IAlphabetBuilder<T> { - public const int UNCLASSIFIED = 0; - int m_nextId = 1; readonly int[] m_map; @@ -31,7 +29,7 @@ public int DefineSymbol(T symbol) { var index = GetSymbolIndex(symbol); - if (m_map[index] == UNCLASSIFIED) + if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) m_map[index] = m_nextId++; return m_map[index]; } @@ -42,7 +40,7 @@ foreach (var symbol in symbols) { var index = GetSymbolIndex(symbol); - if (m_map[index] == UNCLASSIFIED) + if (m_map[index] == DFAConst.UNCLASSIFIED_INPUT) m_map[GetSymbolIndex(symbol)] = m_nextId; else throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); @@ -52,10 +50,10 @@ public List<T>[] CreateReverseMap() { return - Enumerable.Range(UNCLASSIFIED, Count) + Enumerable.Range(0, Count) .Select( i => InputSymbols - .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) + .Where(x => i != DFAConst.UNCLASSIFIED_INPUT && m_map[GetSymbolIndex(x)] == i) .ToList() ) .ToArray(); @@ -70,7 +68,7 @@ foreach (var scl in classes) { // skip if the supper class contains the unclassified element - if (scl.Contains(UNCLASSIFIED)) + if (scl.Contains(DFAConst.UNCLASSIFIED_INPUT)) continue; var range = new List<T>(); foreach (var cl in scl) {
--- a/Implab/Automaton/MapAlphabet.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/MapAlphabet.cs Thu Feb 25 02:11:13 2016 +0300 @@ -5,11 +5,14 @@ namespace Implab.Automaton { public class MapAlphabet<T> : IAlphabetBuilder<T> { readonly Dictionary<T,int> m_map; - int m_nextCls; + int m_nextCls = 1; + + public MapAlphabet() { + m_map = new Dictionary<T, int>(); + } public MapAlphabet(IEqualityComparer<T> comparer) { m_map = new Dictionary<T, int>(comparer); - m_nextCls = 1; } #region IAlphabetBuilder implementation @@ -71,6 +74,9 @@ var map = new int[rmap.Length]; foreach (var cls in classes) { + if (cls.Contains(DFAConst.UNCLASSIFIED_INPUT)) + continue; + var symbols = new List<T>(); foreach (var id in cls) { if (id < 0 || id >= rmap.Length)
--- a/Implab/Automaton/RegularExpressions/Grammar.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/Grammar.cs Thu Feb 25 02:11:13 2016 +0300 @@ -71,14 +71,16 @@ protected CDFADefinition BuildDFA(Token<TTag> lang) { Safe.ArgumentNotNull(lang, "lang"); - var dfa = new CDFADefinition(Alphabet); - + var dfa = new RegularDFADefinition<TSymbol, TTag>(Alphabet, 0); + + var table = new DFATransitionTable<TTag>(); + var builder = new RegularDFABuilder<TTag>(); lang.Accept( builder ); - builder.BuildDFA(dfa); - if (dfa.InitialStateIsFinal) + var initialState = builder.BuildDFA(table); + if (table.IsFinalState(initialState)) throw new ApplicationException("The specified language contains empty token"); return dfa.Optimize();
--- a/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFABuilder.cs Thu Feb 25 02:11:13 2016 +0300 @@ -122,7 +122,7 @@ m_ends.Add(m_idx, token.Tag); } - public void BuildDFA(IDFADefinitionBuilder<TTag> dfa) { + public void BuildDFA(IDFATransitionTableBuilder<TTag> dfa) { Safe.ArgumentNotNull(dfa,"dfa"); var states = new MapAlphabet<HashSet<int>>(new CustomEqualityComparer<HashSet<int>>( @@ -131,7 +131,8 @@ )); var initialState = states.DefineSymbol(m_firstpos); - + dfa.SetInitialState(initialState); + var tags = GetStateTags(m_firstpos); if (tags != null && tags.Length > 0) dfa.MarkFinalState(initialState, tags);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularDFADefinition.cs Thu Feb 25 02:11:13 2016 +0300 @@ -0,0 +1,46 @@ +using System; + +namespace Implab.Automaton.RegularExpressions { + public class RegularDFADefinition<TInput, TTag> : DFATransitionTable<TTag>, IDFATransitionTable<TTag> { + + readonly IAlphabet<TInput> m_alphabet; + readonly int m_initialState; + + public RegularDFADefinition(IAlphabet<TInput> alphabet, int initialState) { + Safe.ArgumentNotNull(alphabet, "aplhabet"); + + m_alphabet = alphabet; + m_initialState = initialState; + } + + + public IAlphabet<TInput> InputAlphabet { + get { + return m_alphabet; + } + } + + protected override DFAStateDescriptior<TTag>[] ConstructTransitionTable() { + if (InputAlphabet.Count != m_alphabet.Count) + throw new InvalidOperationException("The alphabet doesn't match the transition table"); + + return base.ConstructTransitionTable(); + } + + /// <summary> + /// Optimize the specified alphabet. + /// </summary> + /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> + public RegularDFADefinition<TInput, TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + var optimalDFA = new RegularDFADefinition<TInput,TTag>(alphabet, m_initialState); + + Optimize(optimalDFA, InputAlphabet, alphabet, new DummyAlphabet(StateCount), new MapAlphabet<int>()); + + } + + + } +} +
--- a/Implab/Automaton/Scanner.cs Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Automaton/Scanner.cs Thu Feb 25 02:11:13 2016 +0300 @@ -17,12 +17,14 @@ struct ScannerConfig { public DFAStateDescriptior<TTag>[] states; public int[] alphabetMap; + public int initialState; } Stack<ScannerConfig> m_defs = new Stack<ScannerConfig>(); DFAStateDescriptior<TTag>[] m_states; int[] m_alphabetMap; + int m_initialState; protected DFAStateDescriptior<TTag> m_currentState; int m_previewCode; @@ -39,12 +41,13 @@ int m_chunkSize = 1024; // 1k int m_limit = 10 * 1024 * 1024; // 10Mb - protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet) { + protected Scanner(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { Safe.ArgumentNotEmpty(states, "states"); Safe.ArgumentNotNull(alphabet, "alphabet"); m_states = states; m_alphabetMap = alphabet; + m_initialState = initialState; Feed(new char[0]); } @@ -130,7 +133,7 @@ if (m_pointer >= m_bufferSize) return false; - m_currentState = m_states[DFADefinition.INITIAL_STATE]; + m_currentState = m_states[m_initialState]; m_tokenLen = 0; m_tokenOffset = m_pointer; int nextState; @@ -217,16 +220,18 @@ /// </summary> /// <param name="states">Таблица состояний нового ДКА</param> /// <param name="alphabet">Таблица входных символов для нового ДКА</param> - protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet) { + protected void Switch(DFAStateDescriptior<TTag>[] states, int[] alphabet, int initialState) { Safe.ArgumentNotNull(states, "dfa"); m_defs.Push(new ScannerConfig { states = m_states, - alphabetMap = m_alphabetMap + alphabetMap = m_alphabetMap, + initialState = m_initialState }); m_states = states; m_alphabetMap = alphabet; + m_initialState = initialState; m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; } @@ -240,6 +245,7 @@ var prev = m_defs.Pop(); m_states = prev.states; m_alphabetMap = prev.alphabetMap; + m_initialState = prev.initialState; m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/ByteAlphabet.cs Thu Feb 25 02:11:13 2016 +0300 @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Implab.Automaton { + public class ByteAlphabet : IndexedAlphabetBase<byte> { + public ByteAlphabet() : base(byte.MaxValue + 1){ + } + + #region implemented abstract members of IndexedAlphabetBase + + public override int GetSymbolIndex(byte symbol) { + return (int)symbol; + } + + public IEnumerable<byte> InputSymbols { + get { + return Enumerable.Range(byte.MinValue, byte.MaxValue).Cast<byte>(); + } + } + + #endregion + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/CharAlphabet.cs Thu Feb 25 02:11:13 2016 +0300 @@ -0,0 +1,23 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Automaton { + public class CharAlphabet: IndexedAlphabetBase<char> { + + public CharAlphabet() + : base(char.MaxValue + 1) { + } + + public override int GetSymbolIndex(char symbol) { + return symbol; + } + + public override IEnumerable<char> InputSymbols { + get { return Enumerable.Range(char.MinValue, char.MaxValue).Cast<char>(); } + } + } +}
--- a/Implab/Implab.csproj Wed Feb 24 20:12:52 2016 +0300 +++ b/Implab/Implab.csproj Thu Feb 25 02:11:13 2016 +0300 @@ -8,6 +8,9 @@ <RootNamespace>Implab</RootNamespace> <AssemblyName>Implab</AssemblyName> <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> + <ReleaseVersion>0.2</ReleaseVersion> + <ProductVersion>8.0.30703</ProductVersion> + <SchemaVersion>2.0</SchemaVersion> </PropertyGroup> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <DebugSymbols>true</DebugSymbols> @@ -148,20 +151,14 @@ <Compile Include="Components\ExecutionState.cs" /> <Compile Include="Components\RunnableComponent.cs" /> <Compile Include="Components\IFactory.cs" /> - <Compile Include="Automaton\CDFADefinition.cs" /> <Compile Include="Automaton\DFAStateDescriptor.cs" /> <Compile Include="Automaton\DFAutomaton.cs" /> - <Compile Include="Automaton\EDFADefinition.cs" /> <Compile Include="Automaton\EnumAlphabet.cs" /> <Compile Include="Automaton\IAlphabet.cs" /> - <Compile Include="Automaton\IDFADefinition.cs" /> <Compile Include="Automaton\ParserException.cs" /> <Compile Include="Automaton\Scanner.cs" /> - <Compile Include="Automaton\DFADefinition.cs" /> <Compile Include="Automaton\IndexedAlphabetBase.cs" /> - <Compile Include="Automaton\CharAlphabet.cs" /> <Compile Include="Automaton\IAlphabetBuilder.cs" /> - <Compile Include="Automaton\IDFADefinitionBuilder.cs" /> <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> @@ -174,7 +171,6 @@ <Compile Include="Automaton\RegularExpressions\Token.cs" /> <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> <Compile Include="Automaton\AutomatonTransition.cs" /> - <Compile Include="Automaton\ByteAlphabet.cs" /> <Compile Include="Automaton\RegularExpressions\RegularDFABuilder.cs" /> <Compile Include="Formats\JSON\JSONElementContext.cs" /> <Compile Include="Formats\JSON\JSONElementType.cs" /> @@ -188,6 +184,12 @@ <Compile Include="Formats\JSON\StringTranslator.cs" /> <Compile Include="Automaton\MapAlphabet.cs" /> <Compile Include="Automaton\DummyAlphabet.cs" /> + <Compile Include="Automaton\DFATransitionTable.cs" /> + <Compile Include="Automaton\IDFATransitionTableBuilder.cs" /> + <Compile Include="Automaton\IDFATransitionTable.cs" /> + <Compile Include="Automaton\RegularExpressions\RegularDFADefinition.cs" /> + <Compile Include="Formats\CharAlphabet.cs" /> + <Compile Include="Formats\ByteAlphabet.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <ItemGroup />