diff Implab/Formats/Grammar.cs @ 177:a0ff6a0e9c44 ref20160224

refactoring
author cin
date Wed, 23 Mar 2016 01:42:00 +0300
parents
children d5c5db0335ee
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Formats/Grammar.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -0,0 +1,100 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Implab.Automaton;
+using Implab.Automaton.RegularExpressions;
+
+namespace Implab.Formats {
+    /// <summary>
+    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
+    /// </summary>
+    public abstract class Grammar<TSymbol, TTag> {
+        
+        protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
+            get;
+        }
+
+        protected SymbolToken<TTag> UnclassifiedToken() {
+            return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
+        }
+
+        protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
+            Safe.ArgumentNotNull(alphabet, "alphabet");
+
+            foreach (var ch in alphabet)
+                AlphabetBuilder.DefineSymbol(ch);
+        }
+
+        protected Token<TTag> SymbolToken(TSymbol symbol) {
+            return Token<TTag>.New(TranslateOrAdd(symbol));
+        }
+
+        protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+
+            return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
+        }
+
+        protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
+            return SymbolToken(set);
+        }
+
+        int TranslateOrAdd(TSymbol ch) {
+            var t = AlphabetBuilder.Translate(ch);
+            if (t == DFAConst.UNCLASSIFIED_INPUT)
+                t = AlphabetBuilder.DefineSymbol(ch);
+            return t;
+        }
+
+        IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
+            return symbols.Distinct().Select(TranslateOrAdd);
+        }
+
+        int TranslateOrDie(TSymbol ch) {
+            var t = AlphabetBuilder.Translate(ch);
+            if (t == DFAConst.UNCLASSIFIED_INPUT)
+                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
+            return t;
+        }
+
+        IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
+            return symbols.Distinct().Select(TranslateOrDie);
+        }
+
+        protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+
+            return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
+        }
+
+        protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
+
+        protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
+            
+            var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
+
+            var visitor = new RegularExpressionVisitor<TTag>();
+            regexp.Accept( visitor );
+
+            visitor.BuildDFA(dfa);
+
+            if (dfa.IsFinalState(dfa.InitialState))
+                throw new ApplicationException("The specified language contains empty token");
+
+            var ab = CreateAlphabet();
+            var optimal = dfa.Optimize(ab);
+
+            return new ScannerContext<TTag>(
+                optimal.CreateTransitionTable(),
+                optimal.CreateFinalStateTable(),
+                optimal.CreateTagTable(),
+                optimal.InitialState,
+                ab.GetTranslationMap()
+            );
+        }
+
+    }
+
+
+}