Mercurial > pub > ImplabNet
diff Implab/Formats/Grammar.cs @ 177:a0ff6a0e9c44 ref20160224
refactoring
author | cin |
---|---|
date | Wed, 23 Mar 2016 01:42:00 +0300 |
parents | |
children | d5c5db0335ee |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/Grammar.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,100 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using Implab.Automaton; +using Implab.Automaton.RegularExpressions; + +namespace Implab.Formats { + /// <summary> + /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. + /// </summary> + public abstract class Grammar<TSymbol, TTag> { + + protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { + get; + } + + protected SymbolToken<TTag> UnclassifiedToken() { + return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); + } + + protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + foreach (var ch in alphabet) + AlphabetBuilder.DefineSymbol(ch); + } + + protected Token<TTag> SymbolToken(TSymbol symbol) { + return Token<TTag>.New(TranslateOrAdd(symbol)); + } + + protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); + } + + protected Token<TTag> SymbolSetToken(params TSymbol[] set) { + return SymbolToken(set); + } + + int TranslateOrAdd(TSymbol ch) { + var t = AlphabetBuilder.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + t = AlphabetBuilder.DefineSymbol(ch); + return t; + } + + IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { + return symbols.Distinct().Select(TranslateOrAdd); + } + + int TranslateOrDie(TSymbol ch) { + var t = AlphabetBuilder.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); + return t; + } + + IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { + return symbols.Distinct().Select(TranslateOrDie); + } + + protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); + } + + protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); + + protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { + + var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); + + var visitor = new RegularExpressionVisitor<TTag>(); + regexp.Accept( visitor ); + + visitor.BuildDFA(dfa); + + if (dfa.IsFinalState(dfa.InitialState)) + throw new ApplicationException("The specified language contains empty token"); + + var ab = CreateAlphabet(); + var optimal = dfa.Optimize(ab); + + return new ScannerContext<TTag>( + optimal.CreateTransitionTable(), + optimal.CreateFinalStateTable(), + optimal.CreateTagTable(), + optimal.InitialState, + ab.GetTranslationMap() + ); + } + + } + + +}