# HG changeset patch # User cin # Date 1458686520 -10800 # Node ID a0ff6a0e9c447ff1b0006e6424a785bc39f971f7 # Parent 0c3c69fe225b97a4ad44ccd5ee916081587c15ea refactoring diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/AltToken.cs --- a/Implab/Automaton/RegularExpressions/AltToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/AltToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,17 +1,17 @@ using System; namespace Implab.Automaton.RegularExpressions { - public class AltToken: BinaryToken { - public AltToken(Token left, Token right) + public class AltToken: BinaryToken { + public AltToken(Token left, Token right) : base(left, right) { } - public override void Accept(IVisitor visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } public override string ToString() { - return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); + return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); } } } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/BinaryToken.cs --- a/Implab/Automaton/RegularExpressions/BinaryToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/BinaryToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,19 +1,19 @@ using Implab; namespace Implab.Automaton.RegularExpressions { - public abstract class BinaryToken : Token { - readonly Token m_left; - readonly Token m_right; + public abstract class BinaryToken: Token { + readonly Token m_left; + readonly Token m_right; - public Token Left { + public Token Left { get { return m_left; } } - public Token Right { + public Token Right { get { return m_right; } } - protected BinaryToken(Token left, Token right) { + protected BinaryToken(Token left, Token right) { Safe.ArgumentNotNull(m_left = left, "left"); Safe.ArgumentNotNull(m_right = right, "right"); } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/CatToken.cs --- a/Implab/Automaton/RegularExpressions/CatToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/CatToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,12 +1,12 @@ using System; namespace Implab.Automaton.RegularExpressions { - public class CatToken : BinaryToken { - public CatToken(Token left, Token right) + public class CatToken : BinaryToken { + public CatToken(Token left, Token right) : base(left, right) { } - public override void Accept(IVisitor visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } @@ -15,8 +15,8 @@ return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); } - static string FormatToken(Token token) { - return String.Format(token is AltToken ? "({0})" : "{0}", token); + static string FormatToken(Token token) { + return String.Format(token is AltToken ? "({0})" : "{0}", token); } } } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/EmptyToken.cs --- a/Implab/Automaton/RegularExpressions/EmptyToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/EmptyToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,8 +1,8 @@ using Implab; namespace Implab.Automaton.RegularExpressions { - public class EmptyToken : Token { - public override void Accept(IVisitor visitor) { + public class EmptyToken: Token { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/EndToken.cs --- a/Implab/Automaton/RegularExpressions/EndToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/EndToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -5,23 +5,9 @@ /// Конечный символ расширенного регулярного выражения, при построении ДКА /// используется для определения конечных состояний. /// - public class EndToken: Token { - - TTag m_tag; - - public EndToken(TTag tag) { - m_tag = tag; - } + public class EndToken: Token { - public EndToken() - : this(default(TTag)) { - } - - public TTag Tag { - get { return m_tag; } - } - - public override void Accept(IVisitor visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/EndTokenT.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/EndTokenT.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,33 @@ +using Implab; + +namespace Implab.Automaton.RegularExpressions { + /// + /// Конечный символ расширенного регулярного выражения, при построении ДКА + /// используется для определения конечных состояний. + /// + public class EndToken: Token { + + TTag m_tag; + + public EndToken(TTag tag) { + m_tag = tag; + } + + public EndToken() + : this(default(TTag)) { + } + + public TTag Tag { + get { return m_tag; } + } + + public override void Accept(IVisitor visitor) { + Safe.ArgumentOfType(visitor, typeof(IVisitor), "visitor"); + Safe.ArgumentNotNull(visitor, "visitor"); + ((IVisitor)visitor).Visit(this); + } + public override string ToString() { + return "#"; + } + } +} diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/Grammar.cs --- a/Implab/Automaton/RegularExpressions/Grammar.cs Tue Mar 22 18:58:40 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - /// - /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа char. - /// - public abstract class Grammar { - - protected abstract IAlphabetBuilder AlphabetBuilder { - get; - } - - protected SymbolToken UnclassifiedToken() { - return new SymbolToken(DFAConst.UNCLASSIFIED_INPUT); - } - - protected void DefineAlphabet(IEnumerable alphabet) { - Safe.ArgumentNotNull(alphabet, "alphabet"); - - foreach (var ch in alphabet) - AlphabetBuilder.DefineSymbol(ch); - } - - protected Token SymbolToken(TSymbol symbol) { - return Token.New(TranslateOrAdd(symbol)); - } - - protected Token SymbolToken(IEnumerable symbols) { - Safe.ArgumentNotNull(symbols, "symbols"); - - return Token.New(TranslateOrAdd(symbols).ToArray()); - } - - protected Token SymbolSetToken(params TSymbol[] set) { - return SymbolToken(set); - } - - int TranslateOrAdd(TSymbol ch) { - var t = AlphabetBuilder.Translate(ch); - if (t == DFAConst.UNCLASSIFIED_INPUT) - t = AlphabetBuilder.DefineSymbol(ch); - return t; - } - - IEnumerable TranslateOrAdd(IEnumerable symbols) { - return symbols.Distinct().Select(TranslateOrAdd); - } - - int TranslateOrDie(TSymbol ch) { - var t = AlphabetBuilder.Translate(ch); - if (t == DFAConst.UNCLASSIFIED_INPUT) - throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); - return t; - } - - IEnumerable TranslateOrDie(IEnumerable symbols) { - return symbols.Distinct().Select(TranslateOrDie); - } - - protected Token SymbolTokenExcept(IEnumerable symbols) { - Safe.ArgumentNotNull(symbols, "symbols"); - - return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); - } - - protected abstract IndexedAlphabetBase CreateAlphabet(); - - protected ScannerContext BuildScannerContext(Token regexp) { - - var dfa = new RegularDFA(AlphabetBuilder); - - var visitor = new RegularExpressionVisitor(); - regexp.Accept( visitor ); - - visitor.BuildDFA(dfa); - - if (dfa.IsFinalState(dfa.InitialState)) - throw new ApplicationException("The specified language contains empty token"); - - var ab = CreateAlphabet(); - var optimal = dfa.Optimize(ab); - - return new ScannerContext( - optimal.CreateTransitionTable(), - optimal.CreateFinalStateTable(), - optimal.CreateTagTable(), - optimal.InitialState, - ab.GetTranslationMap() - ); - } - - } - - -} diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs --- a/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,5 +1,4 @@ -using System; - + namespace Implab.Automaton.RegularExpressions { public interface ITaggedDFABuilder : IDFATableBuilder { void SetStateTag(int s, TTag[] tags); diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/IVisitor.cs --- a/Implab/Automaton/RegularExpressions/IVisitor.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/IVisitor.cs Wed Mar 23 01:42:00 2016 +0300 @@ -2,12 +2,12 @@ /// /// Интерфейс обходчика синтаксического дерева регулярного выражения /// - public interface IVisitor { - void Visit(AltToken token); - void Visit(StarToken token); - void Visit(CatToken token); - void Visit(EmptyToken token); - void Visit(EndToken token); - void Visit(SymbolToken token); + public interface IVisitor { + void Visit(AltToken token); + void Visit(StarToken token); + void Visit(CatToken token); + void Visit(EmptyToken token); + void Visit(EndToken token); + void Visit(SymbolToken token); } } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/IVisitorT.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/IVisitorT.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,8 @@ +namespace Implab.Automaton.RegularExpressions { + /// + /// Интерфейс обходчика синтаксического дерева регулярного выражения + /// + public interface IVisitor : IVisitor { + void Visit(EndToken token); + } +} diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/RegularDFA.cs --- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,5 +1,4 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; namespace Implab.Automaton.RegularExpressions { diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs --- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 01:42:00 2016 +0300 @@ -12,13 +12,14 @@ /// public class RegularExpressionVisitor : IVisitor { int m_idx; - Token m_root; + Token m_root; HashSet m_firstpos; HashSet m_lastpos; readonly Dictionary> m_followpos = new Dictionary>(); readonly Dictionary m_indexes = new Dictionary(); - readonly Dictionary m_ends = new Dictionary(); + readonly HashSet m_ends = new HashSet(); + readonly Dictionary m_tags = new Dictionary(); public Dictionary> FollowposMap { get { return m_followpos; } @@ -30,19 +31,19 @@ } bool Nullable(object n) { - if (n is EmptyToken || n is StarToken) + if (n is EmptyToken || n is StarToken) return true; - var altToken = n as AltToken; + var altToken = n as AltToken; if (altToken != null) return Nullable(altToken.Left) || Nullable(altToken.Right); - var catToken = n as CatToken; + var catToken = n as CatToken; if (catToken != null) return Nullable(catToken.Left) && Nullable(catToken.Right); return false; } - public void Visit(AltToken token) { + public void Visit(AltToken token) { if (m_root == null) m_root = token; var firtspos = new HashSet(); @@ -60,7 +61,7 @@ m_lastpos = lastpos; } - public void Visit(StarToken token) { + public void Visit(StarToken token) { if (m_root == null) m_root = token; token.Token.Accept(this); @@ -69,7 +70,7 @@ Followpos(i).UnionWith(m_firstpos); } - public void Visit(CatToken token) { + public void Visit(CatToken token) { if (m_root == null) m_root = token; @@ -97,12 +98,12 @@ } - public void Visit(EmptyToken token) { + public void Visit(EmptyToken token) { if (m_root == null) m_root = token; } - public void Visit(SymbolToken token) { + public void Visit(SymbolToken token) { if (m_root == null) m_root = token; m_idx++; @@ -119,7 +120,19 @@ m_firstpos = new HashSet(new[] { m_idx }); m_lastpos = new HashSet(new[] { m_idx }); Followpos(m_idx); - m_ends.Add(m_idx, token.Tag); + m_ends.Add(m_idx); + m_tags.Add(m_idx, token.Tag); + } + + public void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx); } public void BuildDFA(ITaggedDFABuilder dfa) { @@ -157,14 +170,18 @@ } } if (next.Count > 0) { - int s2 = states.Translate(next); - if (s2 == DFAConst.UNCLASSIFIED_INPUT) { + int s2; + if (states.Contains(next)) { + s2 = states.Translate(next); + } else { s2 = states.DefineSymbol(next); - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) { + if (IsFinal(next)) { + dfa.MarkFinalState(s2); - dfa.SetStateTag(s2, tags); + tags = GetStateTags(next); + if (tags != null && tags.Length > 0) + dfa.SetStateTag(s2, tags); } queue.Enqueue(next); @@ -175,9 +192,14 @@ } } + bool IsFinal(IEnumerable state) { + Debug.Assert(state != null); + return state.Any(m_ends.Contains); + } + TTag[] GetStateTags(IEnumerable state) { Debug.Assert(state != null); - return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); + return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); } } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/StarToken.cs --- a/Implab/Automaton/RegularExpressions/StarToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/StarToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,28 +1,25 @@ using Implab; using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; + namespace Implab.Automaton.RegularExpressions { /// /// Замыкание выражения с 0 и более повторов. /// - public class StarToken: Token { + public class StarToken: Token { - Token m_token; + Token m_token; - public Token Token { + public Token Token { get { return m_token; } } - public StarToken(Token token) { + public StarToken(Token token) { Safe.ArgumentNotNull(token, "token"); m_token = token; } - public override void Accept(IVisitor visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/SymbolToken.cs --- a/Implab/Automaton/RegularExpressions/SymbolToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/SymbolToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -4,7 +4,7 @@ /// /// Выражение, соответсвующее одному символу. /// - public class SymbolToken : Token { + public class SymbolToken: Token { int m_value; public int Value { @@ -14,7 +14,7 @@ public SymbolToken(int value) { m_value = value; } - public override void Accept(IVisitor visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Automaton/RegularExpressions/Token.cs --- a/Implab/Automaton/RegularExpressions/Token.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/Token.cs Wed Mar 23 01:42:00 2016 +0300 @@ -3,46 +3,46 @@ using System.Linq; namespace Implab.Automaton.RegularExpressions { - public abstract class Token { - public abstract void Accept(IVisitor visitor); + public abstract class Token { + public abstract void Accept(IVisitor visitor); - public Token Extend() { - return Cat(new EndToken()); + public Token Extend() { + return Cat(new EndToken()); } - public Token Tag(TTag tag) { + public Token Tag(TTag tag) { return Cat(new EndToken(tag)); } - public Token Cat(Token right) { - return new CatToken(this, right); + public Token Cat(Token right) { + return new CatToken(this, right); } - public Token Or(Token right) { - return new AltToken(this, right); + public Token Or(Token right) { + return new AltToken(this, right); } - public Token Optional() { - return Or(new EmptyToken()); + public Token Optional() { + return Or(new EmptyToken()); } - public Token EClosure() { - return new StarToken(this); + public Token EClosure() { + return new StarToken(this); } - public Token Closure() { - return Cat(new StarToken(this)); + public Token Closure() { + return Cat(new StarToken(this)); } - public Token Repeat(int count) { - Token token = null; + public Token Repeat(int count) { + Token token = null; for (int i = 0; i < count; i++) token = token != null ? token.Cat(this) : this; - return token ?? new EmptyToken(); + return token ?? new EmptyToken(); } - public Token Repeat(int min, int max) { + public Token Repeat(int min, int max) { if (min > max || min < 1) throw new ArgumentOutOfRangeException(); var token = Repeat(min); @@ -52,11 +52,11 @@ return token; } - public static Token New(params int[] set) { + public static Token New(params int[] set) { Safe.ArgumentNotNull(set, "set"); - Token token = null; + Token token = null; foreach(var c in set.Distinct()) - token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c)); + token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c)); return token; } } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Formats/ByteAlphabet.cs --- a/Implab/Formats/ByteAlphabet.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/ByteAlphabet.cs Wed Mar 23 01:42:00 2016 +0300 @@ -4,8 +4,6 @@ namespace Implab.Formats { public class ByteAlphabet : IndexedAlphabetBase { - public ByteAlphabet() { - } #region implemented abstract members of IndexedAlphabetBase diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Formats/CharAlphabet.cs --- a/Implab/Formats/CharAlphabet.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/CharAlphabet.cs Wed Mar 23 01:42:00 2016 +0300 @@ -5,9 +5,6 @@ namespace Implab.Formats { public class CharAlphabet: IndexedAlphabetBase { - public CharAlphabet() { - } - public override int GetSymbolIndex(char symbol) { return symbol; } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Formats/Grammar.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/Grammar.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,100 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using Implab.Automaton; +using Implab.Automaton.RegularExpressions; + +namespace Implab.Formats { + /// + /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа char. + /// + public abstract class Grammar { + + protected abstract IAlphabetBuilder AlphabetBuilder { + get; + } + + protected SymbolToken UnclassifiedToken() { + return new SymbolToken(DFAConst.UNCLASSIFIED_INPUT); + } + + protected void DefineAlphabet(IEnumerable alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + foreach (var ch in alphabet) + AlphabetBuilder.DefineSymbol(ch); + } + + protected Token SymbolToken(TSymbol symbol) { + return Token.New(TranslateOrAdd(symbol)); + } + + protected Token SymbolToken(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New(TranslateOrAdd(symbols).ToArray()); + } + + protected Token SymbolSetToken(params TSymbol[] set) { + return SymbolToken(set); + } + + int TranslateOrAdd(TSymbol ch) { + var t = AlphabetBuilder.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + t = AlphabetBuilder.DefineSymbol(ch); + return t; + } + + IEnumerable TranslateOrAdd(IEnumerable symbols) { + return symbols.Distinct().Select(TranslateOrAdd); + } + + int TranslateOrDie(TSymbol ch) { + var t = AlphabetBuilder.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); + return t; + } + + IEnumerable TranslateOrDie(IEnumerable symbols) { + return symbols.Distinct().Select(TranslateOrDie); + } + + protected Token SymbolTokenExcept(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); + } + + protected abstract IndexedAlphabetBase CreateAlphabet(); + + protected ScannerContext BuildScannerContext(Token regexp) { + + var dfa = new RegularDFA(AlphabetBuilder); + + var visitor = new RegularExpressionVisitor(); + regexp.Accept( visitor ); + + visitor.BuildDFA(dfa); + + if (dfa.IsFinalState(dfa.InitialState)) + throw new ApplicationException("The specified language contains empty token"); + + var ab = CreateAlphabet(); + var optimal = dfa.Optimize(ab); + + return new ScannerContext( + optimal.CreateTransitionTable(), + optimal.CreateFinalStateTable(), + optimal.CreateTagTable(), + optimal.InitialState, + ab.GetTranslationMap() + ); + } + + } + + +} diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Formats/JSON/JSONScanner.cs --- a/Implab/Formats/JSON/JSONScanner.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/JSON/JSONScanner.cs Wed Mar 23 01:42:00 2016 +0300 @@ -4,7 +4,6 @@ using System.Text; using Implab.Components; using System.IO; -using Implab.Automaton.RegularExpressions; namespace Implab.Formats.JSON { /// @@ -13,8 +12,8 @@ public class JSONScanner : Disposable { readonly StringBuilder m_builder = new StringBuilder(); - readonly ScannerContext m_jsonScanner = JSONGrammar.Instance.JsonDFA; - readonly ScannerContext m_stringScanner = JSONGrammar.Instance.JsonStringDFA; + readonly ScannerContext m_jsonContext = JSONGrammar.Instance.JsonDFA; + readonly ScannerContext m_stringContext = JSONGrammar.Instance.JsonStringDFA; readonly TextScanner m_scanner; @@ -31,7 +30,7 @@ public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { Safe.ArgumentNotNull(reader, "reader"); - m_scanner = new ReaderScanner(reader); + m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); } /// @@ -44,7 +43,7 @@ /// в строках обрабатываются экранированные символы, числа становтся типа double. public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { JSONGrammar.TokenType[] tag; - if (m_jsonScanner.Execute(m_scanner, out tag)) { + if (m_jsonContext.Execute(m_scanner, out tag)) { switch (tag[0]) { case JSONGrammar.TokenType.StringBound: tokenValue = ReadString(); @@ -68,12 +67,12 @@ string ReadString() { int pos = 0; - char[] buf = new char[6]; // the buffer for unescaping chars + var buf = new char[6]; // the buffer for unescaping chars JSONGrammar.TokenType[] tag; m_builder.Clear(); - while (m_stringScanner.Execute(m_scanner, out tag)) { + while (m_stringContext.Execute(m_scanner, out tag)) { switch (tag[0]) { case JSONGrammar.TokenType.StringBound: return m_builder.ToString(); @@ -89,13 +88,17 @@ m_scanner.CopyTokenTo(buf, 0); m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); break; - default: - break; } } throw new ParserException("Unexpected end of data"); } + + protected override void Dispose(bool disposing) { + if (disposing) + Safe.Dispose(m_scanner); + base.Dispose(disposing); + } } } diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Formats/ScannerContext.cs --- a/Implab/Formats/ScannerContext.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/ScannerContext.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,11 +1,17 @@ -using System; - -namespace Implab.Formats { +namespace Implab.Formats { + /// + /// Represents a scanner configuration usefull to recongnize token, based on the DFA. + /// public class ScannerContext { + public int[,] Dfa { get; private set; } + public bool[] Final { get; private set; } + public TTag[][] Tags { get; private set; } + public int State { get; private set; } + public int[] Alphabet { get; private set; } public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) { diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Formats/TextScanner.cs --- a/Implab/Formats/TextScanner.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/TextScanner.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,9 +1,7 @@ using System; using Implab.Components; -using Implab.Automaton.RegularExpressions; using System.Diagnostics; using Implab.Automaton; -using System.IO; using System.Text; namespace Implab.Formats { @@ -18,7 +16,7 @@ int m_tokenLength; /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Buffer max. /// Chunk size. @@ -30,7 +28,7 @@ } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Buffer. protected TextScanner(char[] buffer) { @@ -48,7 +46,9 @@ /// Final states of the automaton. /// Tags. /// The initial state for the automaton. - internal bool ReadToken(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { + /// + /// + internal bool ReadToken(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { Safe.ArgumentNotNull(); m_tokenLength = 0; @@ -58,10 +58,10 @@ // after the next chunk is read the offset in the buffer may change int pos = m_bufferOffset + m_tokenLength; - while(pos < m_bufferSize) { + while (pos < m_bufferSize) { var ch = m_buffer[pos]; - state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; + state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; if (state == DFAConst.UNREACHABLE_STATE) break; @@ -77,16 +77,17 @@ if (final[state]) { tag = tags[state]; return true; - } else { - if (m_bufferOffset == m_bufferSize) { - if (m_tokenLength == 0) //EOF + } + + if (m_bufferOffset == m_bufferSize) { + if (m_tokenLength == 0) //EOF return false; - throw new ParserException(); - } - throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); + throw new ParserException(); + } + + throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); - } } protected void Feed(char[] buffer, int offset, int length) { @@ -108,7 +109,7 @@ var size = used + free; if (size > m_bufferMax) - throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024); + throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); var temp = new char[size]; diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Implab.csproj --- a/Implab/Implab.csproj Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Implab.csproj Wed Mar 23 01:42:00 2016 +0300 @@ -160,11 +160,9 @@ - - @@ -192,6 +190,10 @@ + + + + diff -r 0c3c69fe225b -r a0ff6a0e9c44 Implab/Safe.cs --- a/Implab/Safe.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Safe.cs Wed Mar 23 01:42:00 2016 +0300 @@ -41,6 +41,11 @@ throw new ArgumentOutOfRangeException(paramName); } + public static void ArgumentOfType(object value, Type type, string paramName) { + if (!type.IsInstanceOfType(value)) + throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName); + } + public static void Dispose(params IDisposable[] objects) { foreach (var d in objects) if (d != null)