annotate Implab/Automaton/RegularExpressions/RegularDFA.cs @ 187:dd4a3590f9c6 ref20160224

Reworked cancelation handling, if the cancel handler isn't specified the OperationCanceledException will be handled by the error handler Any unhandled OperationCanceledException will cause the promise cancelation
author cin
date Tue, 19 Apr 2016 17:35:20 +0300
parents 4f82e0f161c3
children fa6cbf4d8841
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
177
a0ff6a0e9c44 refactoring
cin
parents: 176
diff changeset
1 using System.Collections.Generic;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
2 using System.Linq;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
3
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
4 namespace Implab.Automaton.RegularExpressions {
179
cin
parents: 178
diff changeset
5 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
6
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
7 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
8 readonly IAlphabet<TInput> m_alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
9
179
cin
parents: 178
diff changeset
10 public RegularDFA(IAlphabet<TInput> alphabet) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
11 Safe.ArgumentNotNull(alphabet, "aplhabet");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
12
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
13 m_alphabet = alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
14 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
15
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
16
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
17 public IAlphabet<TInput> InputAlphabet {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
18 get {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
19 return m_alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
20 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
21 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
22
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
23 public void MarkFinalState(int s, TTag[] tags) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
24 MarkFinalState(s);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
25 SetStateTag(s, tags);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
26 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
27
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
28 public void SetStateTag(int s, TTag[] tags) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
29 Safe.ArgumentNotNull(tags, "tags");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
30 m_tags[s] = tags;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
31 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
32
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
33 public TTag[] GetStateTag(int s) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
34 TTag[] tags;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
35 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
36 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
37
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
38 public TTag[][] CreateTagTable() {
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
39 var table = new TTag[StateCount][];
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
40
176
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
41 foreach (var pair in m_tags)
0c3c69fe225b rewritten the text scanner
cin
parents: 172
diff changeset
42 table[pair.Key] = pair.Value;
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
43
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
44 return table;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
45 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
46
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
47 /// <summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
48 /// Optimize the specified alphabet.
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
49 /// </summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
50 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
179
cin
parents: 178
diff changeset
51 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
52 Safe.ArgumentNotNull(alphabet, "alphabet");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
53
179
cin
parents: 178
diff changeset
54 var dfa = new RegularDFA<TInput, TTag>(alphabet);
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
55
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
56 var alphaMap = new Dictionary<int,int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
57 var stateMap = new Dictionary<int,int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
58
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
59 Optimize(dfa, alphaMap, stateMap);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
60
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
61 // mark tags in the new DFA
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
62 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
63 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
64
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
65 // make the alphabet for the new DFA
181
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
66 // skip all unclassified symbols
b2b6a6640aa3 minor fixes and debug
cin
parents: 180
diff changeset
67 foreach (var pair in alphaMap.Where(x => x.Value != 0))
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
68 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
69 return dfa;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
70 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
71
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
72 protected override IEnumerable<HashSet<int>> SplitFinalStates(IEnumerable<int> states) {
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
73 var arrayComparer = new CustomEqualityComparer<TTag[]>(
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
74 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
75 x => x.Sum(it => x.GetHashCode())
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
76 );
183
4f82e0f161c3 fixed DFA optimization, JSON is fully functional
cin
parents: 182
diff changeset
77 return states.GroupBy(x => m_tags[x] ?? new TTag[0], arrayComparer).Select(g => new HashSet<int>(g));
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
78 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
79
182
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
80 public override string ToString() {
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
81 var states = new MapAlphabet<string>(false, null);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
82
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
83 for (int i = 0; i < StateCount; i++)
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
84 states.DefineSymbol(string.Format("s{0}", i), i);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
85
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
86 return string.Format("//[RegularDFA {1} x {2}]\n{0}", PrintDFA(InputAlphabet, states),StateCount, AlphabetSize);
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
87 }
76e8f2ba12b8 pretty print DFA, the minimization is still buggy
cin
parents: 181
diff changeset
88
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
89 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
90 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
91