annotate Implab/Formats/Grammar.cs @ 177:a0ff6a0e9c44 ref20160224

refactoring
author cin
date Wed, 23 Mar 2016 01:42:00 +0300
parents
children d5c5db0335ee
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
177
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
1 using Implab;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
2 using System;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
3 using System.Collections.Generic;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
4 using System.Linq;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
5 using Implab.Automaton;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
6 using Implab.Automaton.RegularExpressions;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
7
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
8 namespace Implab.Formats {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
9 /// <summary>
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
11 /// </summary>
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
12 public abstract class Grammar<TSymbol, TTag> {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
13
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
15 get;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
16 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
17
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
18 protected SymbolToken<TTag> UnclassifiedToken() {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
19 return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
20 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
21
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
23 Safe.ArgumentNotNull(alphabet, "alphabet");
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
24
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
25 foreach (var ch in alphabet)
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
26 AlphabetBuilder.DefineSymbol(ch);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
27 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
28
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
29 protected Token<TTag> SymbolToken(TSymbol symbol) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
30 return Token<TTag>.New(TranslateOrAdd(symbol));
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
31 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
32
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
33 protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
34 Safe.ArgumentNotNull(symbols, "symbols");
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
35
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
36 return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
37 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
38
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
39 protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
40 return SymbolToken(set);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
41 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
42
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
43 int TranslateOrAdd(TSymbol ch) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
44 var t = AlphabetBuilder.Translate(ch);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
45 if (t == DFAConst.UNCLASSIFIED_INPUT)
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
46 t = AlphabetBuilder.DefineSymbol(ch);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
47 return t;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
48 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
49
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
50 IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
51 return symbols.Distinct().Select(TranslateOrAdd);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
52 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
53
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
54 int TranslateOrDie(TSymbol ch) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
55 var t = AlphabetBuilder.Translate(ch);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
56 if (t == DFAConst.UNCLASSIFIED_INPUT)
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
58 return t;
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
59 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
60
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
61 IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
62 return symbols.Distinct().Select(TranslateOrDie);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
63 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
64
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
65 protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
66 Safe.ArgumentNotNull(symbols, "symbols");
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
67
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
68 return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
69 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
70
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
71 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
72
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
73 protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
74
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
75 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
76
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
77 var visitor = new RegularExpressionVisitor<TTag>();
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
78 regexp.Accept( visitor );
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
79
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
80 visitor.BuildDFA(dfa);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
81
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
82 if (dfa.IsFinalState(dfa.InitialState))
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
83 throw new ApplicationException("The specified language contains empty token");
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
84
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
85 var ab = CreateAlphabet();
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
86 var optimal = dfa.Optimize(ab);
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
87
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
88 return new ScannerContext<TTag>(
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
89 optimal.CreateTransitionTable(),
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
90 optimal.CreateFinalStateTable(),
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
91 optimal.CreateTagTable(),
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
92 optimal.InitialState,
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
93 ab.GetTranslationMap()
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
94 );
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
95 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
96
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
97 }
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
98
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
99
a0ff6a0e9c44 refactoring
cin
parents:
diff changeset
100 }