Mercurial > pub > ImplabNet
diff Implab/Automaton/RegularExpressions/Grammar.cs @ 162:0526412bbb26 ref20160224
DFA refactoring
author | cin |
---|---|
date | Wed, 24 Feb 2016 08:39:53 +0300 |
parents | |
children | 419aa51b04fd |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/Grammar.cs Wed Feb 24 08:39:53 2016 +0300 @@ -0,0 +1,108 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Automaton.RegularExpressions { + /// <summary> + /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. + /// </summary> + /// <typeparam name="TGrammar"></typeparam> + public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() { + static TGrammar _instance; + + public static TGrammar Instance{ + get { + if (_instance == null) + _instance = new TGrammar(); + return _instance; + } + } + + readonly CharAlphabet m_alphabet = new CharAlphabet(); + + public CharAlphabet Alphabet { + get { return m_alphabet; } + } + + public SymbolToken UnclassifiedToken() { + return new SymbolToken(CharAlphabet.UNCLASSIFIED); + } + + public void DefineAlphabet(IEnumerable<char> alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + foreach (var ch in alphabet) + m_alphabet.DefineSymbol(ch); + } + public Token SymbolRangeToken(char start, char end) { + return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); + } + + public Token SymbolToken(char symbol) { + return Token.New(TranslateOrAdd(symbol)); + } + + public Token SymbolToken(IEnumerable<char> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New(TranslateOrAdd(symbols).ToArray()); + } + + public Token SymbolSetToken(params char[] set) { + return SymbolToken(set); + } + + int TranslateOrAdd(char ch) { + var t = m_alphabet.Translate(ch); + if (t == CharAlphabet.UNCLASSIFIED) + t = m_alphabet.DefineSymbol(ch); + return t; + } + + IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) { + return symbols.Distinct().Select(TranslateOrAdd); + } + + int TranslateOrDie(char ch) { + var t = m_alphabet.Translate(ch); + if (t == CharAlphabet.UNCLASSIFIED) + throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); + return t; + } + + IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) { + return symbols.Distinct().Select(TranslateOrDie); + } + + public Token SymbolTokenExcept(IEnumerable<char> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); + } + + protected CDFADefinition BuildDFA(Token lang) { + Safe.ArgumentNotNull(lang, "lang"); + + var dfa = new CDFADefinition(m_alphabet); + + var builder = new DFABuilder(); + + lang.Accept( builder ); + + builder.BuildDFA(dfa); + if (dfa.InitialStateIsFinal) + throw new ApplicationException("The specified language contains empty token"); + + return dfa.Optimize(); + } + + + + //protected abstract TGrammar CreateInstance(); + } + + +}