view Implab/Parsing/Grammar.cs @ 89:ce0171cacec4 v2

improved performance of a chained map operation
author cin
date Wed, 08 Oct 2014 02:19:45 +0400
parents c0bf853aa04f
children 130781364799
line wrap: on
line source

using Implab;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Implab.Parsing {
    /// <summary>
    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
    /// </summary>
    /// <typeparam name="TGrammar"></typeparam>
    public abstract class Grammar<TGrammar> where TGrammar: Grammar<TGrammar>, new() {
        Alphabet m_alphabet = new Alphabet();
        static TGrammar _instance;
        
        public static TGrammar Instance{
            get {
                if (_instance == null)
                    _instance = new TGrammar();
                return _instance;
            }
        }

        public SymbolToken UnclassifiedToken() {
            return new SymbolToken(Alphabet.UNCLASSIFIED);
        }

        public void DefineAlphabet(IEnumerable<char> alphabet) {
            Safe.ArgumentNotNull(alphabet, "alphabet");

            foreach (var ch in alphabet)
                m_alphabet.DefineSymbol(ch);
        }
        public Token SymbolRangeToken(char start, char end) {
            return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x));
        }

        public Token SymbolToken(char symbol) {
            return Token.New(TranslateOrAdd(symbol));
        }

        public Token SymbolToken(IEnumerable<char> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New(TranslateOrAdd(symbols).ToArray());
        }

        public Token SymbolSetToken(params char[] set) {
            return SymbolToken(set);
        }

        int TranslateOrAdd(char ch) {
            var t = m_alphabet.Translate(ch);
            if (t == Alphabet.UNCLASSIFIED)
                t = m_alphabet.DefineSymbol(ch);
            return t;
        }

        IEnumerable<int> TranslateOrAdd(IEnumerable<char> symbols) {
            return symbols.Distinct().Select(TranslateOrAdd);
        }

        int TranslateOrDie(char ch) {
            var t = m_alphabet.Translate(ch);
                if (t == Alphabet.UNCLASSIFIED)
                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
            return t;
        }

        IEnumerable<int> TranslateOrDie(IEnumerable<char> symbols) {
            return symbols.Distinct().Select(TranslateOrDie);
        }

        public Token SymbolTokenExcept(IEnumerable<char> symbols) {
            Safe.ArgumentNotNull(symbols, "symbols");

            return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray());
        }

        protected CDFADefinition BuildDFA(Token lang) {
            Safe.ArgumentNotNull(lang, "lang");

            var dfa = new CDFADefinition(m_alphabet);
            
            var builder = new DFABuilder();

            lang.Accept( builder );

            builder.BuildDFA(dfa);
            if (dfa.InitialStateIsFinal)
                throw new ApplicationException("The specified language contains empty token");

            return dfa.Optimize();
        }

        

        //protected abstract TGrammar CreateInstance();
    }


}