view Implab/Formats/Grammar.cs @ 187:dd4a3590f9c6 ref20160224

Reworked cancelation handling, if the cancel handler isn't specified the OperationCanceledException will be handled by the error handler Any unhandled OperationCanceledException will cause the promise cancelation
author cin
date Tue, 19 Apr 2016 17:35:20 +0300
parents d5c5db0335ee
children 6fa235c5a760
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Linq;
using Implab.Automaton;
using Implab.Automaton.RegularExpressions;

namespace Implab.Formats {
    /// <summary>
    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
    /// </summary>
    public abstract class Grammar<TSymbol> {
        
        protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
            get;
        }

        protected SymbolToken UnclassifiedToken() {
            return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
        }

        protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
            Safe.ArgumentNotNull(alphabet, "alphabet");

            foreach (var ch in alphabet)
                AlphabetBuilder.DefineSymbol(ch);
        }

        protected Token SymbolToken(TSymbol symbol) {
            return Token.New(TranslateOrAdd(symbol));
        }

        protected Token SymbolToken(IEnumerable<TSymbol> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New(TranslateOrAdd(symbols).ToArray());
        }

        protected Token SymbolSetToken(params TSymbol[] set) {
            return SymbolToken(set);
        }

        int TranslateOrAdd(TSymbol ch) {
            var t = AlphabetBuilder.Translate(ch);
            if (t == AutomatonConst.UNCLASSIFIED_INPUT)
                t = AlphabetBuilder.DefineSymbol(ch);
            return t;
        }

        IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
            return symbols.Distinct().Select(TranslateOrAdd);
        }

        int TranslateOrDie(TSymbol ch) {
            var t = AlphabetBuilder.Translate(ch);
            if (t == AutomatonConst.UNCLASSIFIED_INPUT)
                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
            return t;
        }

        IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
            return symbols.Distinct().Select(TranslateOrDie);
        }

        protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
        }

        protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();

        protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) {
            
            var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);

            var visitor = new RegularExpressionVisitor<TTag>(dfa);
            regexp.Accept(visitor);
            visitor.BuildDFA();

            if (dfa.IsFinalState(dfa.InitialState))
                throw new ApplicationException("The specified language contains empty token");

            var ab = CreateAlphabet();
            var optimal = dfa.Optimize(ab);

            return new ScannerContext<TTag>(
                optimal.CreateTransitionTable(),
                optimal.CreateFinalStateTable(),
                optimal.CreateTagTable(),
                optimal.InitialState,
                ab.GetTranslationMap()
            );
        }

    }


}