Mercurial > pub > ImplabNet
comparison Implab/Automaton/RegularExpressions/Grammar.cs @ 162:0526412bbb26 ref20160224
DFA refactoring
author | cin |
---|---|
date | Wed, 24 Feb 2016 08:39:53 +0300 |
parents | |
children | 419aa51b04fd |
comparison
equal
deleted
inserted
replaced
161:2a8466f0cb8a | 162:0526412bbb26 |
---|---|
1 using Implab; | |
2 using System; | |
3 using System.Collections.Generic; | |
4 using System.Linq; | |
5 using System.Text; | |
6 using System.Threading.Tasks; | |
7 | |
8 namespace Implab.Automaton.RegularExpressions { | |
9 /// <summary> | |
10 /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. | |
11 /// </summary> | |
12 /// <typeparam name="TGrammar"></typeparam> | |
13 public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() { | |
14 static TGrammar _instance; | |
15 | |
16 public static TGrammar Instance{ | |
17 get { | |
18 if (_instance == null) | |
19 _instance = new TGrammar(); | |
20 return _instance; | |
21 } | |
22 } | |
23 | |
24 readonly CharAlphabet m_alphabet = new CharAlphabet(); | |
25 | |
26 public CharAlphabet Alphabet { | |
27 get { return m_alphabet; } | |
28 } | |
29 | |
30 public SymbolToken UnclassifiedToken() { | |
31 return new SymbolToken(CharAlphabet.UNCLASSIFIED); | |
32 } | |
33 | |
34 public void DefineAlphabet(IEnumerable<char> alphabet) { | |
35 Safe.ArgumentNotNull(alphabet, "alphabet"); | |
36 | |
37 foreach (var ch in alphabet) | |
38 m_alphabet.DefineSymbol(ch); | |
39 } | |
40 public Token SymbolRangeToken(char start, char end) { | |
41 return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); | |
42 } | |
43 | |
44 public Token SymbolToken(char symbol) { | |
45 return Token.New(TranslateOrAdd(symbol)); | |
46 } | |
47 | |
48 public Token SymbolToken(IEnumerable<char> symbols) { | |
49 Safe.ArgumentNotNull(symbols, "symbols"); | |
50 | |
51 return Token.New(TranslateOrAdd(symbols).ToArray()); | |
52 } | |
53 | |
54 public Token SymbolSetToken(params char[] set) { | |
55 return SymbolToken(set); | |
56 } | |
57 | |
58 int TranslateOrAdd(char ch) { | |
59 var t = m_alphabet.Translate(ch); | |
60 if (t == CharAlphabet.UNCLASSIFIED) | |
61 t = m_alphabet.DefineSymbol(ch); | |
62 return t; | |
63 } | |
64 | |
65 IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) { | |
66 return symbols.Distinct().Select(TranslateOrAdd); | |
67 } | |
68 | |
69 int TranslateOrDie(char ch) { | |
70 var t = m_alphabet.Translate(ch); | |
71 if (t == CharAlphabet.UNCLASSIFIED) | |
72 throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); | |
73 return t; | |
74 } | |
75 | |
76 IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) { | |
77 return symbols.Distinct().Select(TranslateOrDie); | |
78 } | |
79 | |
80 public Token SymbolTokenExcept(IEnumerable<char> symbols) { | |
81 Safe.ArgumentNotNull(symbols, "symbols"); | |
82 | |
83 return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); | |
84 } | |
85 | |
86 protected CDFADefinition BuildDFA(Token lang) { | |
87 Safe.ArgumentNotNull(lang, "lang"); | |
88 | |
89 var dfa = new CDFADefinition(m_alphabet); | |
90 | |
91 var builder = new DFABuilder(); | |
92 | |
93 lang.Accept( builder ); | |
94 | |
95 builder.BuildDFA(dfa); | |
96 if (dfa.InitialStateIsFinal) | |
97 throw new ApplicationException("The specified language contains empty token"); | |
98 | |
99 return dfa.Optimize(); | |
100 } | |
101 | |
102 | |
103 | |
104 //protected abstract TGrammar CreateInstance(); | |
105 } | |
106 | |
107 | |
108 } |