view Implab/Formats/Grammar.cs @ 209:a867536c68fc v2

Bound promise to CancellationToken Added new states to ExecutionSate enum. Added Safe.Guard() method to handle cleanup of the result of the promise
author cin
date Wed, 16 Nov 2016 03:06:08 +0300
parents d5c5db0335ee
children 6fa235c5a760
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Linq;
using Implab.Automaton;
using Implab.Automaton.RegularExpressions;

namespace Implab.Formats {
    /// <summary>
    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
    /// </summary>
    public abstract class Grammar<TSymbol> {
        
        protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
            get;
        }

        protected SymbolToken UnclassifiedToken() {
            return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
        }

        protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
            Safe.ArgumentNotNull(alphabet, "alphabet");

            foreach (var ch in alphabet)
                AlphabetBuilder.DefineSymbol(ch);
        }

        protected Token SymbolToken(TSymbol symbol) {
            return Token.New(TranslateOrAdd(symbol));
        }

        protected Token SymbolToken(IEnumerable<TSymbol> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New(TranslateOrAdd(symbols).ToArray());
        }

        protected Token SymbolSetToken(params TSymbol[] set) {
            return SymbolToken(set);
        }

        int TranslateOrAdd(TSymbol ch) {
            var t = AlphabetBuilder.Translate(ch);
            if (t == AutomatonConst.UNCLASSIFIED_INPUT)
                t = AlphabetBuilder.DefineSymbol(ch);
            return t;
        }

        IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
            return symbols.Distinct().Select(TranslateOrAdd);
        }

        int TranslateOrDie(TSymbol ch) {
            var t = AlphabetBuilder.Translate(ch);
            if (t == AutomatonConst.UNCLASSIFIED_INPUT)
                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
            return t;
        }

        IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
            return symbols.Distinct().Select(TranslateOrDie);
        }

        protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
        }

        protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();

        protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) {
            
            var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);

            var visitor = new RegularExpressionVisitor<TTag>(dfa);
            regexp.Accept(visitor);
            visitor.BuildDFA();

            if (dfa.IsFinalState(dfa.InitialState))
                throw new ApplicationException("The specified language contains empty token");

            var ab = CreateAlphabet();
            var optimal = dfa.Optimize(ab);

            return new ScannerContext<TTag>(
                optimal.CreateTransitionTable(),
                optimal.CreateFinalStateTable(),
                optimal.CreateTagTable(),
                optimal.InitialState,
                ab.GetTranslationMap()
            );
        }

    }


}