177
|
1 using Implab;
|
|
2 using System;
|
|
3 using System.Collections.Generic;
|
|
4 using System.Linq;
|
|
5 using Implab.Automaton;
|
|
6 using Implab.Automaton.RegularExpressions;
|
|
7
|
|
8 namespace Implab.Formats {
|
|
9 /// <summary>
|
|
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
|
|
11 /// </summary>
|
178
|
12 public abstract class Grammar<TSymbol> {
|
177
|
13
|
|
14 protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
|
|
15 get;
|
|
16 }
|
|
17
|
178
|
18 protected SymbolToken UnclassifiedToken() {
|
|
19 return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
|
177
|
20 }
|
|
21
|
|
22 protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
|
|
23 Safe.ArgumentNotNull(alphabet, "alphabet");
|
|
24
|
|
25 foreach (var ch in alphabet)
|
|
26 AlphabetBuilder.DefineSymbol(ch);
|
|
27 }
|
|
28
|
178
|
29 protected Token SymbolToken(TSymbol symbol) {
|
|
30 return Token.New(TranslateOrAdd(symbol));
|
177
|
31 }
|
|
32
|
178
|
33 protected Token SymbolToken(IEnumerable<TSymbol> symbols) {
|
177
|
34 Safe.ArgumentNotNull(symbols, "symbols");
|
|
35
|
178
|
36 return Token.New(TranslateOrAdd(symbols).ToArray());
|
177
|
37 }
|
|
38
|
178
|
39 protected Token SymbolSetToken(params TSymbol[] set) {
|
177
|
40 return SymbolToken(set);
|
|
41 }
|
|
42
|
|
43 int TranslateOrAdd(TSymbol ch) {
|
|
44 var t = AlphabetBuilder.Translate(ch);
|
178
|
45 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
|
177
|
46 t = AlphabetBuilder.DefineSymbol(ch);
|
|
47 return t;
|
|
48 }
|
|
49
|
|
50 IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
|
|
51 return symbols.Distinct().Select(TranslateOrAdd);
|
|
52 }
|
|
53
|
|
54 int TranslateOrDie(TSymbol ch) {
|
|
55 var t = AlphabetBuilder.Translate(ch);
|
178
|
56 if (t == AutomatonConst.UNCLASSIFIED_INPUT)
|
177
|
57 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
|
|
58 return t;
|
|
59 }
|
|
60
|
|
61 IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
|
|
62 return symbols.Distinct().Select(TranslateOrDie);
|
|
63 }
|
|
64
|
178
|
65 protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
|
177
|
66 Safe.ArgumentNotNull(symbols, "symbols");
|
|
67
|
178
|
68 return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
|
177
|
69 }
|
|
70
|
|
71 protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
|
|
72
|
178
|
73 protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) {
|
177
|
74
|
|
75 var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
|
|
76
|
178
|
77 var visitor = new RegularExpressionVisitor<TTag>(dfa);
|
|
78 regexp.Accept(visitor);
|
|
79 visitor.BuildDFA();
|
177
|
80
|
|
81 if (dfa.IsFinalState(dfa.InitialState))
|
|
82 throw new ApplicationException("The specified language contains empty token");
|
|
83
|
|
84 var ab = CreateAlphabet();
|
|
85 var optimal = dfa.Optimize(ab);
|
|
86
|
|
87 return new ScannerContext<TTag>(
|
|
88 optimal.CreateTransitionTable(),
|
|
89 optimal.CreateFinalStateTable(),
|
|
90 optimal.CreateTagTable(),
|
|
91 optimal.InitialState,
|
|
92 ab.GetTranslationMap()
|
|
93 );
|
|
94 }
|
|
95
|
|
96 }
|
|
97
|
|
98
|
|
99 }
|