diff Implab/Automaton/RegularExpressions/Grammar.cs @ 162:0526412bbb26 ref20160224

DFA refactoring
author cin
date Wed, 24 Feb 2016 08:39:53 +0300
parents
children 419aa51b04fd
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/Grammar.cs	Wed Feb 24 08:39:53 2016 +0300
@@ -0,0 +1,108 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Automaton.RegularExpressions {
+    /// <summary>
+    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
+    /// </summary>
+    /// <typeparam name="TGrammar"></typeparam>
+    public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() {
+        static TGrammar _instance;
+        
+        public static TGrammar Instance{
+            get {
+                if (_instance == null)
+                    _instance = new TGrammar();
+                return _instance;
+            }
+        }
+
+        readonly CharAlphabet m_alphabet = new CharAlphabet();
+
+        public CharAlphabet Alphabet {
+            get { return m_alphabet; }
+        }
+
+        public SymbolToken UnclassifiedToken() {
+            return new SymbolToken(CharAlphabet.UNCLASSIFIED);
+        }
+
+        public void DefineAlphabet(IEnumerable<char> alphabet) {
+            Safe.ArgumentNotNull(alphabet, "alphabet");
+
+            foreach (var ch in alphabet)
+                m_alphabet.DefineSymbol(ch);
+        }
+        public Token SymbolRangeToken(char start, char end) {
+            return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x));
+        }
+
+        public Token SymbolToken(char symbol) {
+            return Token.New(TranslateOrAdd(symbol));
+        }
+
+        public Token SymbolToken(IEnumerable<char> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+
+            return Token.New(TranslateOrAdd(symbols).ToArray());
+        }
+
+        public Token SymbolSetToken(params char[] set) {
+            return SymbolToken(set);
+        }
+
+        int TranslateOrAdd(char ch) {
+            var t = m_alphabet.Translate(ch);
+            if (t == CharAlphabet.UNCLASSIFIED)
+                t = m_alphabet.DefineSymbol(ch);
+            return t;
+        }
+
+        IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) {
+            return symbols.Distinct().Select(TranslateOrAdd);
+        }
+
+        int TranslateOrDie(char ch) {
+            var t = m_alphabet.Translate(ch);
+                if (t == CharAlphabet.UNCLASSIFIED)
+                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
+            return t;
+        }
+
+        IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) {
+            return symbols.Distinct().Select(TranslateOrDie);
+        }
+
+        public Token SymbolTokenExcept(IEnumerable<char> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+
+            return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray());
+        }
+
+        protected CDFADefinition BuildDFA(Token lang) {
+            Safe.ArgumentNotNull(lang, "lang");
+
+            var dfa = new CDFADefinition(m_alphabet);
+            
+            var builder = new DFABuilder();
+
+            lang.Accept( builder );
+
+            builder.BuildDFA(dfa);
+            if (dfa.InitialStateIsFinal)
+                throw new ApplicationException("The specified language contains empty token");
+
+            return dfa.Optimize();
+        }
+
+        
+
+        //protected abstract TGrammar CreateInstance();
+    }
+
+
+}