| 
162
 | 
     1 using Implab;
 | 
| 
 | 
     2 using System;
 | 
| 
 | 
     3 using System.Collections.Generic;
 | 
| 
 | 
     4 using System.Linq;
 | 
| 
 | 
     5 using System.Text;
 | 
| 
 | 
     6 using System.Threading.Tasks;
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 namespace Implab.Automaton.RegularExpressions {
 | 
| 
 | 
     9     /// <summary>
 | 
| 
 | 
    10     /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
 | 
| 
 | 
    11     /// </summary>
 | 
| 
 | 
    12     /// <typeparam name="TGrammar"></typeparam>
 | 
| 
 | 
    13     public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() {
 | 
| 
 | 
    14         static TGrammar _instance;
 | 
| 
 | 
    15         
 | 
| 
 | 
    16         public static TGrammar Instance{
 | 
| 
 | 
    17             get {
 | 
| 
 | 
    18                 if (_instance == null)
 | 
| 
 | 
    19                     _instance = new TGrammar();
 | 
| 
 | 
    20                 return _instance;
 | 
| 
 | 
    21             }
 | 
| 
 | 
    22         }
 | 
| 
 | 
    23 
 | 
| 
 | 
    24         readonly CharAlphabet m_alphabet = new CharAlphabet();
 | 
| 
 | 
    25 
 | 
| 
 | 
    26         public CharAlphabet Alphabet {
 | 
| 
 | 
    27             get { return m_alphabet; }
 | 
| 
 | 
    28         }
 | 
| 
 | 
    29 
 | 
| 
 | 
    30         public SymbolToken UnclassifiedToken() {
 | 
| 
 | 
    31             return new SymbolToken(CharAlphabet.UNCLASSIFIED);
 | 
| 
 | 
    32         }
 | 
| 
 | 
    33 
 | 
| 
 | 
    34         public void DefineAlphabet(IEnumerable<char> alphabet) {
 | 
| 
 | 
    35             Safe.ArgumentNotNull(alphabet, "alphabet");
 | 
| 
 | 
    36 
 | 
| 
 | 
    37             foreach (var ch in alphabet)
 | 
| 
 | 
    38                 m_alphabet.DefineSymbol(ch);
 | 
| 
 | 
    39         }
 | 
| 
 | 
    40         public Token SymbolRangeToken(char start, char end) {
 | 
| 
 | 
    41             return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x));
 | 
| 
 | 
    42         }
 | 
| 
 | 
    43 
 | 
| 
 | 
    44         public Token SymbolToken(char symbol) {
 | 
| 
 | 
    45             return Token.New(TranslateOrAdd(symbol));
 | 
| 
 | 
    46         }
 | 
| 
 | 
    47 
 | 
| 
 | 
    48         public Token SymbolToken(IEnumerable<char> symbols) {
 | 
| 
 | 
    49             Safe.ArgumentNotNull(symbols, "symbols");
 | 
| 
 | 
    50 
 | 
| 
 | 
    51             return Token.New(TranslateOrAdd(symbols).ToArray());
 | 
| 
 | 
    52         }
 | 
| 
 | 
    53 
 | 
| 
 | 
    54         public Token SymbolSetToken(params char[] set) {
 | 
| 
 | 
    55             return SymbolToken(set);
 | 
| 
 | 
    56         }
 | 
| 
 | 
    57 
 | 
| 
 | 
    58         int TranslateOrAdd(char ch) {
 | 
| 
 | 
    59             var t = m_alphabet.Translate(ch);
 | 
| 
 | 
    60             if (t == CharAlphabet.UNCLASSIFIED)
 | 
| 
 | 
    61                 t = m_alphabet.DefineSymbol(ch);
 | 
| 
 | 
    62             return t;
 | 
| 
 | 
    63         }
 | 
| 
 | 
    64 
 | 
| 
 | 
    65         IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) {
 | 
| 
 | 
    66             return symbols.Distinct().Select(TranslateOrAdd);
 | 
| 
 | 
    67         }
 | 
| 
 | 
    68 
 | 
| 
 | 
    69         int TranslateOrDie(char ch) {
 | 
| 
 | 
    70             var t = m_alphabet.Translate(ch);
 | 
| 
 | 
    71                 if (t == CharAlphabet.UNCLASSIFIED)
 | 
| 
 | 
    72                     throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
 | 
| 
 | 
    73             return t;
 | 
| 
 | 
    74         }
 | 
| 
 | 
    75 
 | 
| 
 | 
    76         IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) {
 | 
| 
 | 
    77             return symbols.Distinct().Select(TranslateOrDie);
 | 
| 
 | 
    78         }
 | 
| 
 | 
    79 
 | 
| 
 | 
    80         public Token SymbolTokenExcept(IEnumerable<char> symbols) {
 | 
| 
 | 
    81             Safe.ArgumentNotNull(symbols, "symbols");
 | 
| 
 | 
    82 
 | 
| 
 | 
    83             return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray());
 | 
| 
 | 
    84         }
 | 
| 
 | 
    85 
 | 
| 
 | 
    86         protected CDFADefinition BuildDFA(Token lang) {
 | 
| 
 | 
    87             Safe.ArgumentNotNull(lang, "lang");
 | 
| 
 | 
    88 
 | 
| 
 | 
    89             var dfa = new CDFADefinition(m_alphabet);
 | 
| 
 | 
    90             
 | 
| 
 | 
    91             var builder = new DFABuilder();
 | 
| 
 | 
    92 
 | 
| 
 | 
    93             lang.Accept( builder );
 | 
| 
 | 
    94 
 | 
| 
 | 
    95             builder.BuildDFA(dfa);
 | 
| 
 | 
    96             if (dfa.InitialStateIsFinal)
 | 
| 
 | 
    97                 throw new ApplicationException("The specified language contains empty token");
 | 
| 
 | 
    98 
 | 
| 
 | 
    99             return dfa.Optimize();
 | 
| 
 | 
   100         }
 | 
| 
 | 
   101 
 | 
| 
 | 
   102         
 | 
| 
 | 
   103 
 | 
| 
 | 
   104         //protected abstract TGrammar CreateInstance();
 | 
| 
 | 
   105     }
 | 
| 
 | 
   106 
 | 
| 
 | 
   107 
 | 
| 
 | 
   108 }
 |