annotate Implab/Automaton/RegularExpressions/RegularDFA.cs @ 182:76e8f2ba12b8 ref20160224

pretty print DFA, the minimization is still buggy
author cin
date Thu, 24 Mar 2016 18:52:10 +0300
parents b2b6a6640aa3
children 4f82e0f161c3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
1 using System.Collections.Generic;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
2 using System.Linq;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
3
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
4 namespace Implab.Automaton.RegularExpressions {
179
cin
parents: 178
diff changeset
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
6
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
8 readonly IAlphabet<TInput> m_alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
9
179
cin
parents: 178
diff changeset
10 public RegularDFA(IAlphabet<TInput> alphabet) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
12
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
13 m_alphabet = alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
14 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
15
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
16
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
17 public IAlphabet<TInput> InputAlphabet {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
18 get {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
19 return m_alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
20 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
21 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
22
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
23 public void MarkFinalState(int s, TTag[] tags) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
24 MarkFinalState(s);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
25 SetStateTag(s, tags);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
26 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
27
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
28 public void SetStateTag(int s, TTag[] tags) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
29 Safe.ArgumentNotNull(tags, "tags");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
30 m_tags[s] = tags;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
31 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
32
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
33 public TTag[] GetStateTag(int s) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
34 TTag[] tags;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
36 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
37
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
38 public TTag[][] CreateTagTable() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
39 var table = new TTag[StateCount][];
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
40
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
41 foreach (var pair in m_tags)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
42 table[pair.Key] = pair.Value;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
43
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
44 return table;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
45 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
46
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
47 /// <summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
48 /// Optimize the specified alphabet.
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
49 /// </summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
179
cin
parents: 178
diff changeset
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
52 Safe.ArgumentNotNull(alphabet, "alphabet");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
53
179
cin
parents: 178
diff changeset
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
55
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
56 var alphaMap = new Dictionary<int,int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
57 var stateMap = new Dictionary<int,int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
58
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
59 Optimize(dfa, alphaMap, stateMap);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
60
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
61 // mark tags in the new DFA
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
64
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
65 // make the alphabet for the new DFA
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
66 // skip all unclassified symbols
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
69
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
70 var orig = ToString();
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
71 var opt = dfa.ToString();
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
72
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
73 return dfa;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
74 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
75
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
76 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
77 var arrayComparer = new CustomEqualityComparer<TTag[]>(
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
78 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
79 x => x.Sum(it => x.GetHashCode())
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
80 );
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
81 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
82 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
83
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
84 public override string ToString() {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
85 var states = new MapAlphabet<string>(false, null);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
86
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
87 for (int i = 0; i < StateCount; i++)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
88 states.DefineSymbol(string.Format("s{0}", i), i);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
89
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
90 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
91 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
92
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
93 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
94 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
95