view Implab/Formats/Grammar.cs @ 180:c32688129f14 ref20160224

refactoring complete, JSONParser rewritten
author cin
date Thu, 24 Mar 2016 02:30:46 +0300
parents d5c5db0335ee
children 6fa235c5a760
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Linq;
using Implab.Automaton;
using Implab.Automaton.RegularExpressions;

namespace Implab.Formats {
    /// <summary>
    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
    /// </summary>
    public abstract class Grammar<TSymbol> {
        
        protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
            get;
        }

        protected SymbolToken UnclassifiedToken() {
            return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
        }

        protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
            Safe.ArgumentNotNull(alphabet, "alphabet");

            foreach (var ch in alphabet)
                AlphabetBuilder.DefineSymbol(ch);
        }

        protected Token SymbolToken(TSymbol symbol) {
            return Token.New(TranslateOrAdd(symbol));
        }

        protected Token SymbolToken(IEnumerable<TSymbol> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New(TranslateOrAdd(symbols).ToArray());
        }

        protected Token SymbolSetToken(params TSymbol[] set) {
            return SymbolToken(set);
        }

        int TranslateOrAdd(TSymbol ch) {
            var t = AlphabetBuilder.Translate(ch);
            if (t == AutomatonConst.UNCLASSIFIED_INPUT)
                t = AlphabetBuilder.DefineSymbol(ch);
            return t;
        }

        IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
            return symbols.Distinct().Select(TranslateOrAdd);
        }

        int TranslateOrDie(TSymbol ch) {
            var t = AlphabetBuilder.Translate(ch);
            if (t == AutomatonConst.UNCLASSIFIED_INPUT)
                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
            return t;
        }

        IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
            return symbols.Distinct().Select(TranslateOrDie);
        }

        protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
        }

        protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();

        protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) {
            
            var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);

            var visitor = new RegularExpressionVisitor<TTag>(dfa);
            regexp.Accept(visitor);
            visitor.BuildDFA();

            if (dfa.IsFinalState(dfa.InitialState))
                throw new ApplicationException("The specified language contains empty token");

            var ab = CreateAlphabet();
            var optimal = dfa.Optimize(ab);

            return new ScannerContext<TTag>(
                optimal.CreateTransitionTable(),
                optimal.CreateFinalStateTable(),
                optimal.CreateTagTable(),
                optimal.InitialState,
                ab.GetTranslationMap()
            );
        }

    }


}