changeset 177:a0ff6a0e9c44 ref20160224

refactoring
author cin
date Wed, 23 Mar 2016 01:42:00 +0300 (2016-03-22)
parents 0c3c69fe225b
children d5c5db0335ee
files Implab/Automaton/RegularExpressions/AltToken.cs Implab/Automaton/RegularExpressions/BinaryToken.cs Implab/Automaton/RegularExpressions/CatToken.cs Implab/Automaton/RegularExpressions/EmptyToken.cs Implab/Automaton/RegularExpressions/EndToken.cs Implab/Automaton/RegularExpressions/EndTokenT.cs Implab/Automaton/RegularExpressions/Grammar.cs Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Implab/Automaton/RegularExpressions/IVisitor.cs Implab/Automaton/RegularExpressions/IVisitorT.cs Implab/Automaton/RegularExpressions/RegularDFA.cs Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Implab/Automaton/RegularExpressions/StarToken.cs Implab/Automaton/RegularExpressions/SymbolToken.cs Implab/Automaton/RegularExpressions/Token.cs Implab/Formats/ByteAlphabet.cs Implab/Formats/CharAlphabet.cs Implab/Formats/Grammar.cs Implab/Formats/JSON/JSONScanner.cs Implab/Formats/ScannerContext.cs Implab/Formats/TextScanner.cs Implab/Implab.csproj Implab/Safe.cs
diffstat 23 files changed, 284 insertions(+), 226 deletions(-) [+]
line wrap: on
line diff
--- a/Implab/Automaton/RegularExpressions/AltToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/AltToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,17 +1,17 @@
 using System;
 
 namespace Implab.Automaton.RegularExpressions {
-    public class AltToken<TTag>: BinaryToken<TTag> {
-        public AltToken(Token<TTag> left, Token<TTag> right)
+    public class AltToken: BinaryToken {
+        public AltToken(Token left, Token right)
             : base(left, right) {
         }
 
-        public override void Accept(IVisitor<TTag> visitor) {
+        public override void Accept(IVisitor visitor) {
             Safe.ArgumentNotNull(visitor, "visitor");
             visitor.Visit(this);
         }
         public override string ToString() {
-            return String.Format(Right is BinaryToken<TTag> ? "{0}|({1})" : "{0}|{1}", Left, Right);
+            return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right);
         }
     }
 }
--- a/Implab/Automaton/RegularExpressions/BinaryToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/BinaryToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,19 +1,19 @@
 using Implab;
 
 namespace Implab.Automaton.RegularExpressions {
-    public abstract class BinaryToken<TTag> : Token<TTag> {
-        readonly Token<TTag> m_left;
-        readonly Token<TTag> m_right;
+    public abstract class BinaryToken: Token {
+        readonly Token m_left;
+        readonly Token m_right;
 
-        public Token<TTag> Left {
+        public Token Left {
             get { return m_left; }
         }
 
-        public Token<TTag> Right {
+        public Token Right {
             get { return m_right; }
         }
 
-        protected BinaryToken(Token<TTag> left, Token<TTag> right) {
+        protected BinaryToken(Token left, Token right) {
             Safe.ArgumentNotNull(m_left = left, "left");
             Safe.ArgumentNotNull(m_right = right, "right");
         }
--- a/Implab/Automaton/RegularExpressions/CatToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/CatToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,12 +1,12 @@
 using System;
 
 namespace Implab.Automaton.RegularExpressions {
-    public class CatToken<TTag> : BinaryToken<TTag> {
-        public CatToken(Token<TTag> left, Token<TTag> right)
+    public class CatToken : BinaryToken {
+        public CatToken(Token left, Token right)
             : base(left, right) {
         }
 
-        public override void Accept(IVisitor<TTag> visitor) {
+        public override void Accept(IVisitor visitor) {
             Safe.ArgumentNotNull(visitor, "visitor");
             visitor.Visit(this);
         }
@@ -15,8 +15,8 @@
             return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right));
         }
 
-        static string FormatToken(Token<TTag> token) {
-            return String.Format(token is AltToken<TTag> ? "({0})" : "{0}", token);
+        static string FormatToken(Token token) {
+            return String.Format(token is AltToken ? "({0})" : "{0}", token);
         }
     }
 }
--- a/Implab/Automaton/RegularExpressions/EmptyToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/EmptyToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,8 +1,8 @@
 using Implab;
 
 namespace Implab.Automaton.RegularExpressions {
-    public class EmptyToken<TTag> : Token<TTag> {
-        public override void Accept(IVisitor<TTag> visitor) {
+    public class EmptyToken: Token {
+        public override void Accept(IVisitor visitor) {
             Safe.ArgumentNotNull(visitor, "visitor");
             visitor.Visit(this);
         }
--- a/Implab/Automaton/RegularExpressions/EndToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/EndToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -5,23 +5,9 @@
     /// Конечный символ расширенного регулярного выражения, при построении ДКА
     /// используется для определения конечных состояний.
     /// </summary>
-    public class EndToken<TTag>: Token<TTag> {
-
-        TTag m_tag;
-
-        public EndToken(TTag tag) {
-            m_tag = tag;
-        }
+    public class EndToken: Token {
 
-        public EndToken()
-            : this(default(TTag)) {
-        }
-
-        public TTag Tag {
-            get { return m_tag; }
-        }
-        
-        public override void Accept(IVisitor<TTag> visitor) {
+        public override void Accept(IVisitor visitor) {
             Safe.ArgumentNotNull(visitor, "visitor");
             visitor.Visit(this);
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/EndTokenT.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -0,0 +1,33 @@
+using Implab;
+
+namespace Implab.Automaton.RegularExpressions {
+    /// <summary>
+    /// Конечный символ расширенного регулярного выражения, при построении ДКА
+    /// используется для определения конечных состояний.
+    /// </summary>
+    public class EndToken<TTag>: Token {
+
+        TTag m_tag;
+
+        public EndToken(TTag tag) {
+            m_tag = tag;
+        }
+
+        public EndToken()
+            : this(default(TTag)) {
+        }
+
+        public TTag Tag {
+            get { return m_tag; }
+        }
+        
+        public override void Accept(IVisitor visitor) {
+            Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor");
+            Safe.ArgumentNotNull(visitor, "visitor");
+            ((IVisitor<TTag>)visitor).Visit(this);
+        }
+        public override string ToString() {
+            return "#";
+        }
+    }
+}
--- a/Implab/Automaton/RegularExpressions/Grammar.cs	Tue Mar 22 18:58:40 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-using Implab;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace Implab.Automaton.RegularExpressions {
-    /// <summary>
-    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
-    /// </summary>
-    public abstract class Grammar<TSymbol, TTag> {
-        
-        protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
-            get;
-        }
-
-        protected SymbolToken<TTag> UnclassifiedToken() {
-            return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
-        }
-
-        protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
-            Safe.ArgumentNotNull(alphabet, "alphabet");
-
-            foreach (var ch in alphabet)
-                AlphabetBuilder.DefineSymbol(ch);
-        }
-
-        protected Token<TTag> SymbolToken(TSymbol symbol) {
-            return Token<TTag>.New(TranslateOrAdd(symbol));
-        }
-
-        protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
-            Safe.ArgumentNotNull(symbols, "symbols");
-
-            return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
-        }
-
-        protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
-            return SymbolToken(set);
-        }
-
-        int TranslateOrAdd(TSymbol ch) {
-            var t = AlphabetBuilder.Translate(ch);
-            if (t == DFAConst.UNCLASSIFIED_INPUT)
-                t = AlphabetBuilder.DefineSymbol(ch);
-            return t;
-        }
-
-        IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
-            return symbols.Distinct().Select(TranslateOrAdd);
-        }
-
-        int TranslateOrDie(TSymbol ch) {
-            var t = AlphabetBuilder.Translate(ch);
-            if (t == DFAConst.UNCLASSIFIED_INPUT)
-                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
-            return t;
-        }
-
-        IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
-            return symbols.Distinct().Select(TranslateOrDie);
-        }
-
-        protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
-            Safe.ArgumentNotNull(symbols, "symbols");
-
-            return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
-        }
-
-        protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
-
-        protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
-            
-            var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
-
-            var visitor = new RegularExpressionVisitor<TTag>();
-            regexp.Accept( visitor );
-
-            visitor.BuildDFA(dfa);
-
-            if (dfa.IsFinalState(dfa.InitialState))
-                throw new ApplicationException("The specified language contains empty token");
-
-            var ab = CreateAlphabet();
-            var optimal = dfa.Optimize(ab);
-
-            return new ScannerContext<TTag>(
-                optimal.CreateTransitionTable(),
-                optimal.CreateFinalStateTable(),
-                optimal.CreateTagTable(),
-                optimal.InitialState,
-                ab.GetTranslationMap()
-            );
-        }
-
-    }
-
-
-}
--- a/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,5 +1,4 @@
-using System;
-
+
 namespace Implab.Automaton.RegularExpressions {
     public interface ITaggedDFABuilder<TTag> : IDFATableBuilder {
         void SetStateTag(int s, TTag[] tags);
--- a/Implab/Automaton/RegularExpressions/IVisitor.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/IVisitor.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -2,12 +2,12 @@
     /// <summary>
     /// Интерфейс обходчика синтаксического дерева регулярного выражения
     /// </summary>
-    public interface IVisitor<TTag> {
-        void Visit(AltToken<TTag> token);
-        void Visit(StarToken<TTag> token);
-        void Visit(CatToken<TTag> token);
-        void Visit(EmptyToken<TTag> token);
-        void Visit(EndToken<TTag> token);
-        void Visit(SymbolToken<TTag> token);
+    public interface IVisitor {
+        void Visit(AltToken token);
+        void Visit(StarToken token);
+        void Visit(CatToken token);
+        void Visit(EmptyToken token);
+        void Visit(EndToken token);
+        void Visit(SymbolToken token);
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/IVisitorT.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -0,0 +1,8 @@
+namespace Implab.Automaton.RegularExpressions {
+    /// <summary>
+    /// Интерфейс обходчика синтаксического дерева регулярного выражения
+    /// </summary>
+    public interface IVisitor<T> : IVisitor {
+        void Visit(EndToken<T> token);
+    }
+}
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,5 +1,4 @@
-using System;
-using System.Collections.Generic;
+using System.Collections.Generic;
 using System.Linq;
 
 namespace Implab.Automaton.RegularExpressions {
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -12,13 +12,14 @@
     /// </summary>
     public class RegularExpressionVisitor<TTag> : IVisitor<TTag> {
         int m_idx;
-        Token<TTag> m_root;
+        Token m_root;
         HashSet<int> m_firstpos;
         HashSet<int> m_lastpos;
 
         readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>();
         readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>();
-        readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>();
+        readonly HashSet<int> m_ends = new HashSet<int>();
+        readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>();
 
         public Dictionary<int, HashSet<int>> FollowposMap {
             get { return m_followpos; }
@@ -30,19 +31,19 @@
         }
 
         bool Nullable(object n) {
-            if (n is EmptyToken<TTag> || n is StarToken<TTag>)
+            if (n is EmptyToken || n is StarToken)
                 return true;
-            var altToken = n as AltToken<TTag>;
+            var altToken = n as AltToken;
             if (altToken != null)
                 return Nullable(altToken.Left) || Nullable(altToken.Right);
-            var catToken = n as CatToken<TTag>;
+            var catToken = n as CatToken;
             if (catToken != null)
                 return Nullable(catToken.Left) && Nullable(catToken.Right);
             return false;
         }
 
 
-        public void Visit(AltToken<TTag> token) {
+        public void Visit(AltToken token) {
             if (m_root == null)
                 m_root = token;
             var firtspos = new HashSet<int>();
@@ -60,7 +61,7 @@
             m_lastpos = lastpos;
         }
 
-        public void Visit(StarToken<TTag> token) {
+        public void Visit(StarToken token) {
             if (m_root == null)
                 m_root = token;
             token.Token.Accept(this);
@@ -69,7 +70,7 @@
                 Followpos(i).UnionWith(m_firstpos);
         }
 
-        public void Visit(CatToken<TTag> token) {
+        public void Visit(CatToken token) {
             if (m_root == null)
                 m_root = token;
 
@@ -97,12 +98,12 @@
 
         }
 
-        public void Visit(EmptyToken<TTag> token) {
+        public void Visit(EmptyToken token) {
             if (m_root == null)
                 m_root = token;
         }
 
-        public void Visit(SymbolToken<TTag> token) {
+        public void Visit(SymbolToken token) {
             if (m_root == null)
                 m_root = token;
             m_idx++;
@@ -119,7 +120,19 @@
             m_firstpos = new HashSet<int>(new[] { m_idx });
             m_lastpos = new HashSet<int>(new[] { m_idx });
             Followpos(m_idx);
-            m_ends.Add(m_idx, token.Tag);
+            m_ends.Add(m_idx);
+            m_tags.Add(m_idx, token.Tag);
+        }
+
+        public void Visit(EndToken token) {
+            if (m_root == null)
+                m_root = token;
+            m_idx++;
+            m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
+            m_firstpos = new HashSet<int>(new[] { m_idx });
+            m_lastpos = new HashSet<int>(new[] { m_idx });
+            Followpos(m_idx);
+            m_ends.Add(m_idx);
         }
 
         public void BuildDFA(ITaggedDFABuilder<TTag> dfa) {
@@ -157,14 +170,18 @@
                         }
                     }
                     if (next.Count > 0) {
-                        int s2 = states.Translate(next);
-                        if (s2 == DFAConst.UNCLASSIFIED_INPUT) {
+                        int s2;
+                        if (states.Contains(next)) {
+                            s2 = states.Translate(next);
+                        } else {
                             s2 = states.DefineSymbol(next);
 
-                            tags = GetStateTags(next);
-                            if (tags != null && tags.Length > 0) {
+                            if (IsFinal(next)) {
+                                
                                 dfa.MarkFinalState(s2);
-                                dfa.SetStateTag(s2, tags);
+                                tags = GetStateTags(next);
+                                if (tags != null && tags.Length > 0)
+                                    dfa.SetStateTag(s2, tags);
                             }
                             
                             queue.Enqueue(next);
@@ -175,9 +192,14 @@
             }
         }
 
+        bool IsFinal(IEnumerable<int> state) {
+            Debug.Assert(state != null);
+            return state.Any(m_ends.Contains);
+        }
+
         TTag[] GetStateTags(IEnumerable<int> state) {
             Debug.Assert(state != null);
-            return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray();
+            return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
         }
 
     }
--- a/Implab/Automaton/RegularExpressions/StarToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/StarToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,28 +1,25 @@
 using Implab;
 using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+
 
 namespace Implab.Automaton.RegularExpressions {
     /// <summary>
     /// Замыкание выражения с 0 и более повторов.
     /// </summary>
-    public class StarToken<TTag>: Token<TTag> {
+    public class StarToken: Token {
 
-        Token<TTag> m_token;
+        Token m_token;
 
-        public Token<TTag> Token {
+        public Token Token {
             get { return m_token; }
         }
 
-        public StarToken(Token<TTag> token) {
+        public StarToken(Token token) {
             Safe.ArgumentNotNull(token, "token");
             m_token = token;
         }
 
-        public override void Accept(IVisitor<TTag> visitor) {
+        public override void Accept(IVisitor visitor) {
             Safe.ArgumentNotNull(visitor, "visitor");
             visitor.Visit(this);
         }
--- a/Implab/Automaton/RegularExpressions/SymbolToken.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/SymbolToken.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -4,7 +4,7 @@
     /// <summary>
     /// Выражение, соответсвующее одному символу.
     /// </summary>
-    public class SymbolToken<TTag> : Token<TTag> {
+    public class SymbolToken: Token {
         int m_value;
 
         public int Value {
@@ -14,7 +14,7 @@
         public SymbolToken(int value) {
             m_value = value;
         }
-        public override void Accept(IVisitor<TTag> visitor) {
+        public override void Accept(IVisitor visitor) {
             Safe.ArgumentNotNull(visitor, "visitor");
 
             visitor.Visit(this);
--- a/Implab/Automaton/RegularExpressions/Token.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/Token.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -3,46 +3,46 @@
 using System.Linq;
 
 namespace Implab.Automaton.RegularExpressions {
-    public abstract class Token<TTag> {
-        public abstract void Accept(IVisitor<TTag> visitor);
+    public abstract class Token {
+        public abstract void Accept(IVisitor visitor);
 
-        public Token<TTag> Extend() {
-            return Cat(new EndToken<TTag>());
+        public Token Extend() {
+            return Cat(new EndToken());
         }
 
-        public Token<TTag> Tag(TTag tag) {
+        public Token Tag<TTag>(TTag tag) {
             return Cat(new EndToken<TTag>(tag));
         }
 
-        public Token<TTag> Cat(Token<TTag> right) {
-            return new CatToken<TTag>(this, right);
+        public Token Cat(Token right) {
+            return new CatToken(this, right);
         }
 
-        public Token<TTag> Or(Token<TTag> right) {
-            return new AltToken<TTag>(this, right);
+        public Token Or(Token right) {
+            return new AltToken(this, right);
         }
 
-        public Token<TTag> Optional() {
-            return Or(new EmptyToken<TTag>());
+        public Token Optional() {
+            return Or(new EmptyToken());
         }
 
-        public Token<TTag> EClosure() {
-            return new StarToken<TTag>(this);
+        public Token EClosure() {
+            return new StarToken(this);
         }
 
-        public Token<TTag> Closure() {
-            return Cat(new StarToken<TTag>(this));
+        public Token Closure() {
+            return Cat(new StarToken(this));
         }
 
-        public Token<TTag> Repeat(int count) {
-            Token<TTag> token = null;
+        public Token Repeat(int count) {
+            Token token = null;
 
             for (int i = 0; i < count; i++)
                 token = token != null ? token.Cat(this) : this;
-            return token ?? new EmptyToken<TTag>();
+            return token ?? new EmptyToken();
         }
 
-        public Token<TTag> Repeat(int min, int max) {
+        public Token Repeat(int min, int max) {
             if (min > max || min < 1)
                 throw new ArgumentOutOfRangeException();
             var token = Repeat(min);
@@ -52,11 +52,11 @@
             return token;
         }
 
-        public static Token<TTag> New(params int[] set) {
+        public static Token New(params int[] set) {
             Safe.ArgumentNotNull(set, "set");
-            Token<TTag> token = null;
+            Token token = null;
             foreach(var c in set.Distinct())
-                token = token == null ? new SymbolToken<TTag>(c) : token.Or(new SymbolToken<TTag>(c));
+                token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c));
             return token;
         }
     }
--- a/Implab/Formats/ByteAlphabet.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Formats/ByteAlphabet.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -4,8 +4,6 @@
 
 namespace Implab.Formats {
     public class ByteAlphabet : IndexedAlphabetBase<byte> {
-        public ByteAlphabet() {
-        }
 
         #region implemented abstract members of IndexedAlphabetBase
 
--- a/Implab/Formats/CharAlphabet.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Formats/CharAlphabet.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -5,9 +5,6 @@
 namespace Implab.Formats {
     public class CharAlphabet: IndexedAlphabetBase<char> {
 
-        public CharAlphabet() {
-        }
-
         public override int GetSymbolIndex(char symbol) {
             return symbol;
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Formats/Grammar.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -0,0 +1,100 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Implab.Automaton;
+using Implab.Automaton.RegularExpressions;
+
+namespace Implab.Formats {
+    /// <summary>
+    /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>.
+    /// </summary>
+    public abstract class Grammar<TSymbol, TTag> {
+        
+        protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder {
+            get;
+        }
+
+        protected SymbolToken<TTag> UnclassifiedToken() {
+            return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT);
+        }
+
+        protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) {
+            Safe.ArgumentNotNull(alphabet, "alphabet");
+
+            foreach (var ch in alphabet)
+                AlphabetBuilder.DefineSymbol(ch);
+        }
+
+        protected Token<TTag> SymbolToken(TSymbol symbol) {
+            return Token<TTag>.New(TranslateOrAdd(symbol));
+        }
+
+        protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+
+            return Token<TTag>.New(TranslateOrAdd(symbols).ToArray());
+        }
+
+        protected Token<TTag> SymbolSetToken(params TSymbol[] set) {
+            return SymbolToken(set);
+        }
+
+        int TranslateOrAdd(TSymbol ch) {
+            var t = AlphabetBuilder.Translate(ch);
+            if (t == DFAConst.UNCLASSIFIED_INPUT)
+                t = AlphabetBuilder.DefineSymbol(ch);
+            return t;
+        }
+
+        IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) {
+            return symbols.Distinct().Select(TranslateOrAdd);
+        }
+
+        int TranslateOrDie(TSymbol ch) {
+            var t = AlphabetBuilder.Translate(ch);
+            if (t == DFAConst.UNCLASSIFIED_INPUT)
+                    throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
+            return t;
+        }
+
+        IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) {
+            return symbols.Distinct().Select(TranslateOrDie);
+        }
+
+        protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) {
+            Safe.ArgumentNotNull(symbols, "symbols");
+
+            return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
+        }
+
+        protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet();
+
+        protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) {
+            
+            var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder);
+
+            var visitor = new RegularExpressionVisitor<TTag>();
+            regexp.Accept( visitor );
+
+            visitor.BuildDFA(dfa);
+
+            if (dfa.IsFinalState(dfa.InitialState))
+                throw new ApplicationException("The specified language contains empty token");
+
+            var ab = CreateAlphabet();
+            var optimal = dfa.Optimize(ab);
+
+            return new ScannerContext<TTag>(
+                optimal.CreateTransitionTable(),
+                optimal.CreateFinalStateTable(),
+                optimal.CreateTagTable(),
+                optimal.InitialState,
+                ab.GetTranslationMap()
+            );
+        }
+
+    }
+
+
+}
--- a/Implab/Formats/JSON/JSONScanner.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Formats/JSON/JSONScanner.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -4,7 +4,6 @@
 using System.Text;
 using Implab.Components;
 using System.IO;
-using Implab.Automaton.RegularExpressions;
 
 namespace Implab.Formats.JSON {
     /// <summary>
@@ -13,8 +12,8 @@
     public class JSONScanner : Disposable {
         readonly StringBuilder m_builder = new StringBuilder();
 
-        readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA;
-        readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA;
+        readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA;
+        readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA;
 
 
         readonly TextScanner m_scanner;
@@ -31,7 +30,7 @@
         public JSONScanner(TextReader reader, int bufferMax, int chunkSize) {
             Safe.ArgumentNotNull(reader, "reader");
 
-            m_scanner = new ReaderScanner(reader);
+            m_scanner = new ReaderScanner(reader, bufferMax, chunkSize);
         }
 
         /// <summary>
@@ -44,7 +43,7 @@
         /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
         public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
             JSONGrammar.TokenType[] tag;
-            if (m_jsonScanner.Execute(m_scanner, out tag)) {
+            if (m_jsonContext.Execute(m_scanner, out tag)) {
                 switch (tag[0]) {
                     case JSONGrammar.TokenType.StringBound:
                         tokenValue = ReadString();
@@ -68,12 +67,12 @@
 
         string ReadString() {
             int pos = 0;
-            char[] buf = new char[6]; // the buffer for unescaping chars
+            var buf = new char[6]; // the buffer for unescaping chars
 
             JSONGrammar.TokenType[] tag;
             m_builder.Clear();
 
-            while (m_stringScanner.Execute(m_scanner, out tag)) {
+            while (m_stringContext.Execute(m_scanner, out tag)) {
                 switch (tag[0]) {
                     case JSONGrammar.TokenType.StringBound:
                         return m_builder.ToString();
@@ -89,13 +88,17 @@
                         m_scanner.CopyTokenTo(buf, 0);
                         m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1]));
                         break;
-                    default:
-                        break;
                 }
 
             }
 
             throw new ParserException("Unexpected end of data");
         }
+
+        protected override void Dispose(bool disposing) {
+            if (disposing)
+                Safe.Dispose(m_scanner);
+            base.Dispose(disposing);
+        }
     }
 }
--- a/Implab/Formats/ScannerContext.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Formats/ScannerContext.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,11 +1,17 @@
-using System;
-
-namespace Implab.Formats {
+namespace Implab.Formats {
+    /// <summary>
+    /// Represents a scanner configuration usefull to recongnize token, based on the DFA.
+    /// </summary>
     public class ScannerContext<TTag> {
+
         public int[,] Dfa { get; private set; }
+
         public bool[] Final { get; private set; }
+
         public TTag[][] Tags { get; private set; }
+
         public int State { get; private set; }
+
         public int[] Alphabet { get; private set; }
 
         public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) {
--- a/Implab/Formats/TextScanner.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Formats/TextScanner.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -1,9 +1,7 @@
 using System;
 using Implab.Components;
-using Implab.Automaton.RegularExpressions;
 using System.Diagnostics;
 using Implab.Automaton;
-using System.IO;
 using System.Text;
 
 namespace Implab.Formats {
@@ -18,7 +16,7 @@
         int m_tokenLength;
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class.
+        /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
         /// </summary>
         /// <param name="bufferMax">Buffer max.</param>
         /// <param name="chunkSize">Chunk size.</param>
@@ -30,7 +28,7 @@
         }
 
         /// <summary>
-        /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class.
+        /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class.
         /// </summary>
         /// <param name="buffer">Buffer.</param>
         protected TextScanner(char[] buffer) {
@@ -48,7 +46,9 @@
         /// <param name="final">Final states of the automaton.</param>
         /// <param name="tags">Tags.</param>
         /// <param name="state">The initial state for the automaton.</param>
-        internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
+        /// <param name="alphabet"></param>
+        /// <param name = "tag"></param>
+        internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) {
             Safe.ArgumentNotNull();
             m_tokenLength = 0;
 
@@ -58,10 +58,10 @@
                 // after the next chunk is read the offset in the buffer may change
                 int pos = m_bufferOffset + m_tokenLength;
 
-                while(pos < m_bufferSize) {
+                while (pos < m_bufferSize) {
                     var ch = m_buffer[pos];
 
-                    state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
+                    state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]];
                     if (state == DFAConst.UNREACHABLE_STATE)
                         break;
                     
@@ -77,16 +77,17 @@
             if (final[state]) {
                 tag = tags[state];
                 return true;
-            } else {
-                if (m_bufferOffset == m_bufferSize) {
-                    if (m_tokenLength == 0) //EOF
+            }
+
+            if (m_bufferOffset == m_bufferSize) {
+                if (m_tokenLength == 0) //EOF
                         return false;
                     
-                    throw new ParserException();
-                }
-                throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
+                throw new ParserException();
+            }
+
+            throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset]));
             
-            }
         }
 
         protected void Feed(char[] buffer, int offset, int length) {
@@ -108,7 +109,7 @@
                     var size = used + free;
 
                     if (size > m_bufferMax)
-                        throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024);
+                        throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024));
                     
                     var temp = new char[size];
 
--- a/Implab/Implab.csproj	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Implab.csproj	Wed Mar 23 01:42:00 2016 +0300
@@ -160,11 +160,9 @@
     <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" />
     <Compile Include="Automaton\RegularExpressions\CatToken.cs" />
     <Compile Include="Automaton\DFAConst.cs" />
-    <Compile Include="Automaton\RegularExpressions\Grammar.cs" />
     <Compile Include="Automaton\RegularExpressions\StarToken.cs" />
     <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" />
     <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" />
-    <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
     <Compile Include="Automaton\RegularExpressions\Token.cs" />
     <Compile Include="Automaton\RegularExpressions\IVisitor.cs" />
     <Compile Include="Automaton\AutomatonTransition.cs" />
@@ -192,6 +190,10 @@
     <Compile Include="Formats\StringScanner.cs" />
     <Compile Include="Formats\ReaderScanner.cs" />
     <Compile Include="Formats\ScannerContext.cs" />
+    <Compile Include="Formats\Grammar.cs" />
+    <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" />
+    <Compile Include="Automaton\RegularExpressions\EndToken.cs" />
+    <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" />
   </ItemGroup>
   <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
   <ItemGroup />
--- a/Implab/Safe.cs	Tue Mar 22 18:58:40 2016 +0300
+++ b/Implab/Safe.cs	Wed Mar 23 01:42:00 2016 +0300
@@ -41,6 +41,11 @@
                 throw new ArgumentOutOfRangeException(paramName);
         }
 
+        public static void ArgumentOfType(object value, Type type, string paramName) {
+            if (!type.IsInstanceOfType(value))
+                throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName);
+        }
+
         public static void Dispose(params IDisposable[] objects) {
             foreach (var d in objects)
                 if (d != null)