Mercurial > pub > ImplabNet
diff Implab/Parsing/DFABuilder.cs @ 55:c0bf853aa04f
Added initial JSON support
+JSONParser
+JSONWriter
author | cin |
---|---|
date | Sun, 15 Jun 2014 19:39:11 +0400 |
parents | |
children | 97fbbf816844 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/DFABuilder.cs Sun Jun 15 19:39:11 2014 +0400 @@ -0,0 +1,182 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// <summary> + /// Используется для построения ДКА по регулярному выражению, сначала обходит + /// регулярное выражение и вычисляет followpos, затем используется метод + /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. + /// </summary> + public class DFABuilder : IVisitor { + int m_idx = 0; + Token m_root; + HashSet<int> m_firstpos; + HashSet<int> m_lastpos; + + Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); + Dictionary<int, int> m_indexes = new Dictionary<int, int>(); + Dictionary<int, int> m_ends = new Dictionary<int, int>(); + + public Dictionary<int, HashSet<int>> FollowposMap { + get { return m_followpos; } + } + + public HashSet<int> Followpos(int pos) { + HashSet<int> set; + if (m_followpos.TryGetValue(pos, out set)) + return set; + return m_followpos[pos] = new HashSet<int>(); + } + + bool Nullable(object n) { + if (n is EmptyToken || n is StarToken) + return true; + if (n is AltToken) + return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); + if (n is CatToken) + return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); + return false; + } + + + public void Visit(AltToken token) { + if (m_root == null) + m_root = token; + var firtspos = new HashSet<int>(); + var lastpos = new HashSet<int>(); + + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + token.Right.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + } + + public void Visit(StarToken token) { + if (m_root == null) + m_root = token; + token.Token.Accept(this); + + foreach (var i in m_lastpos) + Followpos(i).UnionWith(m_firstpos); + } + + public void Visit(CatToken token) { + if (m_root == null) + m_root = token; + + var firtspos = new HashSet<int>(); + var lastpos = new HashSet<int>(); + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + var leftLastpos = m_lastpos; + + token.Right.Accept(this); + lastpos.UnionWith(m_lastpos); + var rightFirstpos = m_firstpos; + + if (Nullable(token.Left)) + firtspos.UnionWith(rightFirstpos); + + if (Nullable(token.Right)) + lastpos.UnionWith(leftLastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + + foreach (var i in leftLastpos) + Followpos(i).UnionWith(rightFirstpos); + + } + + public void Visit(EmptyToken token) { + if (m_root == null) + m_root = token; + ; + } + + public void Visit(SymbolToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = token.Value; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + } + + public void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = Alphabet.UNCLASSIFIED; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx, token.Tag); + } + + public void BuildDFA(IDFADefinition dfa) { + Safe.ArgumentNotNull(dfa,"dfa"); + + var stateMap = new Dictionary<HashSet<int>, int>(new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + (x) => x.Sum(n => n.GetHashCode()) + )); + + stateMap[m_firstpos] = DefineState( dfa, m_firstpos); + Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); + + var queue = new Queue<HashSet<int>>(); + + queue.Enqueue(m_firstpos); + + while (queue.Count > 0) { + var state = queue.Dequeue(); + var s1 = stateMap[state]; + + for (int a = 0; a < dfa.AlphabetSize; a++) { + var next = new HashSet<int>(); + foreach (var p in state) { + if (m_indexes[p] == a) { + next.UnionWith(Followpos(p)); + } + } + if (next.Count > 0) { + int s2; + if (!stateMap.TryGetValue(next, out s2)) { + stateMap[next] = s2 = DefineState(dfa, next); + queue.Enqueue(next); + } + dfa.DefineTransition(s1, s2, a); + } + } + + } + } + + int[] GetStateTags(HashSet<int> state) { + Debug.Assert(state != null); + return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray(); + } + + int DefineState(IDFADefinition automa, HashSet<int> state) { + Debug.Assert(automa != null); + Debug.Assert(state != null); + + var tags = GetStateTags(state); + + return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); + } + + } +}