annotate Implab/Automaton/RegularExpressions/RegularDFA.cs @ 172:92d5278d1b10 ref20160224

Working on text scanner
author cin
date Mon, 14 Mar 2016 01:19:38 +0300
parents
children 0c3c69fe225b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
172
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
1 using System;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
2 using System.Collections.Generic;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
3 using System.Linq;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
4
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
5 namespace Implab.Automaton.RegularExpressions {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
6 public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
7
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
8 readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
9 readonly IAlphabet<TInput> m_alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
10
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
11 public RegularDFA(IAlphabet<TInput> alphabet) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
12 Safe.ArgumentNotNull(alphabet, "aplhabet");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
13
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
14 m_alphabet = alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
15 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
16
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
17
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
18 public IAlphabet<TInput> InputAlphabet {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
19 get {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
20 return m_alphabet;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
21 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
22 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
23
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
24 public void MarkFinalState(int s, TTag[] tags) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
25 MarkFinalState(s);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
26 SetStateTag(s, tags);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
27 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
28
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
29 public void SetStateTag(int s, TTag[] tags) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
30 Safe.ArgumentNotNull(tags, "tags");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
31 m_tags[s] = tags;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
32 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
33
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
34 public TTag[] GetStateTag(int s) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
35 TTag[] tags;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
36 return m_tags.TryGetValue(s, out tags) ? tags : new TTag[0];
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
37 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
38
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
39 public new DFAStateDescriptor<TTag>[] CreateTransitionTable() {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
40 var table = new DFAStateDescriptor<TTag>[StateCount];
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
41
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
42 foreach (var t in this) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
43 if (table[t.s1].transitions == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
44 table[t.s1] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s1), GetStateTag(t.s1));
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
45 if (table[t.s2].transitions == null)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
46 table[t.s2] = new DFAStateDescriptor<TTag>(AlphabetSize, IsFinalState(t.s2), GetStateTag(t.s2));
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
47 table[t.s1].transitions[t.edge] = t.s2;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
48 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
49
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
50 return table;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
51 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
52
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
53 /// <summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
54 /// Optimize the specified alphabet.
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
55 /// </summary>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
56 /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param>
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
57 public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
58 Safe.ArgumentNotNull(alphabet, "alphabet");
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
59
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
60 var dfa = new RegularDFA<TInput, TTag>(alphabet);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
61
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
62 var states = new DummyAlphabet(StateCount);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
63 var alphaMap = new Dictionary<int,int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
64 var stateMap = new Dictionary<int,int>();
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
65
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
66 Optimize(dfa, alphaMap, stateMap);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
67
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
68 // mark tags in the new DFA
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
69 foreach (var g in m_tags.Where(x => x.Key < StateCount).GroupBy(x => stateMap[x.Key], x => x.Value ))
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
70 dfa.SetStateTag(g.Key, g.SelectMany(x => x).ToArray());
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
71
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
72 // make the alphabet for the new DFA
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
73 foreach (var pair in alphaMap)
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
74 alphabet.DefineClass(m_alphabet.GetSymbols(pair.Key), pair.Value);
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
75
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
76 return dfa;
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
77 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
78
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
79 protected override IEnumerable<HashSet<int>> GroupFinalStates() {
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
80 var arrayComparer = new CustomEqualityComparer<TTag[]>(
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
81 (x,y) => x.Length == y.Length && x.All(it => y.Contains(it)),
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
82 x => x.Sum(it => x.GetHashCode())
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
83 );
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
84 return FinalStates.GroupBy(x => m_tags[x], arrayComparer).Select(g => new HashSet<int>(g));
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
85 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
86
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
87 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
88 }
92d5278d1b10 Working on text scanner
cin
parents:
diff changeset
89