162
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Linq;
|
|
5
|
|
6 namespace Implab.Automaton.RegularExpressions {
|
|
7 /// <summary>
|
|
8 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
|
|
9 /// </summary>
|
165
|
10 public abstract class Grammar<TSymbol, TTag> {
|
162
|
11
|
165
|
12 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
|
163
|
13 get;
|
162
|
14 }
|
|
15
|
165
|
16 protected SymbolToken<TTag> UnclassifiedToken() {
|
163
|
17 return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
|
162
|
18 }
|
|
19
|
165
|
20 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
|
162
|
21 Safe.ArgumentNotNull(alphabet, "alphabet");
|
|
22
|
|
23 foreach (var ch in alphabet)
|
165
|
24 AlphabetBuilder.DefineSymbol(ch);
|
162
|
25 }
|
163
|
26
|
165
|
27 protected Token<TTag> SymbolToken(TSymbol symbol) {
|
163
|
28 return Token<TTag>.New(TranslateOrAdd(symbol));
|
162
|
29 }
|
|
30
|
165
|
31 protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
|
163
|
32 Safe.ArgumentNotNull(symbols, "symbols");
|
|
33
|
|
34 return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
|
162
|
35 }
|
|
36
|
165
|
37 protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
|
162
|
38 return SymbolToken(set);
|
|
39 }
|
|
40
|
163
|
41 int TranslateOrAdd(TSymbol ch) {
|
165
|
42 var t = AlphabetBuilder.Translate(ch);
|
163
|
43 if (t == DFAConst.UNCLASSIFIED_INPUT)
|
165
|
44 t = AlphabetBuilder.DefineSymbol(ch);
|
162
|
45 return t;
|
|
46 }
|
|
47
|
163
|
48 IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
|
162
|
49 return symbols.Distinct().Select(TranslateOrAdd);
|
|
50 }
|
|
51
|
163
|
52 int TranslateOrDie(TSymbol ch) {
|
165
|
53 var t = AlphabetBuilder.Translate(ch);
|
163
|
54 if (t == DFAConst.UNCLASSIFIED_INPUT)
|
162
|
55 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
|
|
56 return t;
|
|
57 }
|
|
58
|
163
|
59 IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
|
162
|
60 return symbols.Distinct().Select(TranslateOrDie);
|
|
61 }
|
|
62
|
165
|
63 protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
|
162
|
64 Safe.ArgumentNotNull(symbols, "symbols");
|
|
65
|
165
|
66 return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
|
162
|
67 }
|
|
68
|
176
|
69 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
|
164
|
70
|
176
|
71 protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
|
172
|
72
|
|
73 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
|
162
|
74
|
172
|
75 var visitor = new RegularExpressionVisitor<TTag>();
|
|
76 regexp.Accept( visitor );
|
162
|
77
|
172
|
78 visitor.BuildDFA(dfa);
|
165
|
79
|
|
80 if (dfa.IsFinalState(dfa.InitialState))
|
162
|
81 throw new ApplicationException("The specified language contains empty token");
|
|
82
|
176
|
83 var ab = CreateAlphabet();
|
|
84 var optimal = dfa.Optimize(ab);
|
|
85
|
|
86 return new ScannerContext<TTag>(
|
|
87 optimal.CreateTransitionTable(),
|
|
88 optimal.CreateFinalStateTable(),
|
|
89 optimal.CreateTagTable(),
|
|
90 optimal.InitialState,
|
|
91 ab.GetTranslationMap()
|
|
92 );
|
162
|
93 }
|
|
94
|
|
95 }
|
|
96
|
|
97
|
|
98 }
|