Mercurial > pub > ImplabNet
changeset 177:a0ff6a0e9c44 ref20160224
refactoring
line wrap: on
line diff
--- a/Implab/Automaton/RegularExpressions/AltToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/AltToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,17 +1,17 @@ using System; namespace Implab.Automaton.RegularExpressions { - public class AltToken<TTag>: BinaryToken<TTag> { - public AltToken(Token<TTag> left, Token<TTag> right) + public class AltToken: BinaryToken { + public AltToken(Token left, Token right) : base(left, right) { } - public override void Accept(IVisitor<TTag> visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } public override string ToString() { - return String.Format(Right is BinaryToken<TTag> ? "{0}|({1})" : "{0}|{1}", Left, Right); + return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); } } }
--- a/Implab/Automaton/RegularExpressions/BinaryToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/BinaryToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,19 +1,19 @@ using Implab; namespace Implab.Automaton.RegularExpressions { - public abstract class BinaryToken<TTag> : Token<TTag> { - readonly Token<TTag> m_left; - readonly Token<TTag> m_right; + public abstract class BinaryToken: Token { + readonly Token m_left; + readonly Token m_right; - public Token<TTag> Left { + public Token Left { get { return m_left; } } - public Token<TTag> Right { + public Token Right { get { return m_right; } } - protected BinaryToken(Token<TTag> left, Token<TTag> right) { + protected BinaryToken(Token left, Token right) { Safe.ArgumentNotNull(m_left = left, "left"); Safe.ArgumentNotNull(m_right = right, "right"); }
--- a/Implab/Automaton/RegularExpressions/CatToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/CatToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,12 +1,12 @@ using System; namespace Implab.Automaton.RegularExpressions { - public class CatToken<TTag> : BinaryToken<TTag> { - public CatToken(Token<TTag> left, Token<TTag> right) + public class CatToken : BinaryToken { + public CatToken(Token left, Token right) : base(left, right) { } - public override void Accept(IVisitor<TTag> visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); } @@ -15,8 +15,8 @@ return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); } - static string FormatToken(Token<TTag> token) { - return String.Format(token is AltToken<TTag> ? "({0})" : "{0}", token); + static string FormatToken(Token token) { + return String.Format(token is AltToken ? "({0})" : "{0}", token); } } }
--- a/Implab/Automaton/RegularExpressions/EmptyToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/EmptyToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,8 +1,8 @@ using Implab; namespace Implab.Automaton.RegularExpressions { - public class EmptyToken<TTag> : Token<TTag> { - public override void Accept(IVisitor<TTag> visitor) { + public class EmptyToken: Token { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); }
--- a/Implab/Automaton/RegularExpressions/EndToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/EndToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -5,23 +5,9 @@ /// Конечный символ расширенного регулярного выражения, при построении ДКА /// используется для определения конечных состояний. /// </summary> - public class EndToken<TTag>: Token<TTag> { - - TTag m_tag; - - public EndToken(TTag tag) { - m_tag = tag; - } + public class EndToken: Token { - public EndToken() - : this(default(TTag)) { - } - - public TTag Tag { - get { return m_tag; } - } - - public override void Accept(IVisitor<TTag> visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/EndTokenT.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,33 @@ +using Implab; + +namespace Implab.Automaton.RegularExpressions { + /// <summary> + /// Конечный символ расширенного регулярного выражения, при построении ДКА + /// используется для определения конечных состояний. + /// </summary> + public class EndToken<TTag>: Token { + + TTag m_tag; + + public EndToken(TTag tag) { + m_tag = tag; + } + + public EndToken() + : this(default(TTag)) { + } + + public TTag Tag { + get { return m_tag; } + } + + public override void Accept(IVisitor visitor) { + Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); + Safe.ArgumentNotNull(visitor, "visitor"); + ((IVisitor<TTag>)visitor).Visit(this); + } + public override string ToString() { + return "#"; + } + } +}
--- a/Implab/Automaton/RegularExpressions/Grammar.cs Tue Mar 22 18:58:40 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ -using Implab; -using System; -using System.Collections.Generic; -using System.Linq; - -namespace Implab.Automaton.RegularExpressions { - /// <summary> - /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. - /// </summary> - public abstract class Grammar<TSymbol, TTag> { - - protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { - get; - } - - protected SymbolToken<TTag> UnclassifiedToken() { - return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); - } - - protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { - Safe.ArgumentNotNull(alphabet, "alphabet"); - - foreach (var ch in alphabet) - AlphabetBuilder.DefineSymbol(ch); - } - - protected Token<TTag> SymbolToken(TSymbol symbol) { - return Token<TTag>.New(TranslateOrAdd(symbol)); - } - - protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { - Safe.ArgumentNotNull(symbols, "symbols"); - - return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); - } - - protected Token<TTag> SymbolSetToken(params TSymbol[] set) { - return SymbolToken(set); - } - - int TranslateOrAdd(TSymbol ch) { - var t = AlphabetBuilder.Translate(ch); - if (t == DFAConst.UNCLASSIFIED_INPUT) - t = AlphabetBuilder.DefineSymbol(ch); - return t; - } - - IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { - return symbols.Distinct().Select(TranslateOrAdd); - } - - int TranslateOrDie(TSymbol ch) { - var t = AlphabetBuilder.Translate(ch); - if (t == DFAConst.UNCLASSIFIED_INPUT) - throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); - return t; - } - - IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { - return symbols.Distinct().Select(TranslateOrDie); - } - - protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { - Safe.ArgumentNotNull(symbols, "symbols"); - - return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); - } - - protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); - - protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { - - var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); - - var visitor = new RegularExpressionVisitor<TTag>(); - regexp.Accept( visitor ); - - visitor.BuildDFA(dfa); - - if (dfa.IsFinalState(dfa.InitialState)) - throw new ApplicationException("The specified language contains empty token"); - - var ab = CreateAlphabet(); - var optimal = dfa.Optimize(ab); - - return new ScannerContext<TTag>( - optimal.CreateTransitionTable(), - optimal.CreateFinalStateTable(), - optimal.CreateTagTable(), - optimal.InitialState, - ab.GetTranslationMap() - ); - } - - } - - -}
--- a/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/ITaggedDFABuilder.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,5 +1,4 @@ -using System; - + namespace Implab.Automaton.RegularExpressions { public interface ITaggedDFABuilder<TTag> : IDFATableBuilder { void SetStateTag(int s, TTag[] tags);
--- a/Implab/Automaton/RegularExpressions/IVisitor.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/IVisitor.cs Wed Mar 23 01:42:00 2016 +0300 @@ -2,12 +2,12 @@ /// <summary> /// Интерфейс обходчика синтаксического дерева регулярного выражения /// </summary> - public interface IVisitor<TTag> { - void Visit(AltToken<TTag> token); - void Visit(StarToken<TTag> token); - void Visit(CatToken<TTag> token); - void Visit(EmptyToken<TTag> token); - void Visit(EndToken<TTag> token); - void Visit(SymbolToken<TTag> token); + public interface IVisitor { + void Visit(AltToken token); + void Visit(StarToken token); + void Visit(CatToken token); + void Visit(EmptyToken token); + void Visit(EndToken token); + void Visit(SymbolToken token); } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/IVisitorT.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,8 @@ +namespace Implab.Automaton.RegularExpressions { + /// <summary> + /// Интерфейс обходчика синтаксического дерева регулярного выражения + /// </summary> + public interface IVisitor<T> : IVisitor { + void Visit(EndToken<T> token); + } +}
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,5 +1,4 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; namespace Implab.Automaton.RegularExpressions {
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 01:42:00 2016 +0300 @@ -12,13 +12,14 @@ /// </summary> public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { int m_idx; - Token<TTag> m_root; + Token m_root; HashSet<int> m_firstpos; HashSet<int> m_lastpos; readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); - readonly Dictionary<int, TTag> m_ends = new Dictionary<int, TTag>(); + readonly HashSet<int> m_ends = new HashSet<int>(); + readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); public Dictionary<int, HashSet<int>> FollowposMap { get { return m_followpos; } @@ -30,19 +31,19 @@ } bool Nullable(object n) { - if (n is EmptyToken<TTag> || n is StarToken<TTag>) + if (n is EmptyToken || n is StarToken) return true; - var altToken = n as AltToken<TTag>; + var altToken = n as AltToken; if (altToken != null) return Nullable(altToken.Left) || Nullable(altToken.Right); - var catToken = n as CatToken<TTag>; + var catToken = n as CatToken; if (catToken != null) return Nullable(catToken.Left) && Nullable(catToken.Right); return false; } - public void Visit(AltToken<TTag> token) { + public void Visit(AltToken token) { if (m_root == null) m_root = token; var firtspos = new HashSet<int>(); @@ -60,7 +61,7 @@ m_lastpos = lastpos; } - public void Visit(StarToken<TTag> token) { + public void Visit(StarToken token) { if (m_root == null) m_root = token; token.Token.Accept(this); @@ -69,7 +70,7 @@ Followpos(i).UnionWith(m_firstpos); } - public void Visit(CatToken<TTag> token) { + public void Visit(CatToken token) { if (m_root == null) m_root = token; @@ -97,12 +98,12 @@ } - public void Visit(EmptyToken<TTag> token) { + public void Visit(EmptyToken token) { if (m_root == null) m_root = token; } - public void Visit(SymbolToken<TTag> token) { + public void Visit(SymbolToken token) { if (m_root == null) m_root = token; m_idx++; @@ -119,7 +120,19 @@ m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); Followpos(m_idx); - m_ends.Add(m_idx, token.Tag); + m_ends.Add(m_idx); + m_tags.Add(m_idx, token.Tag); + } + + public void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_firstpos = new HashSet<int>(new[] { m_idx }); + m_lastpos = new HashSet<int>(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx); } public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { @@ -157,14 +170,18 @@ } } if (next.Count > 0) { - int s2 = states.Translate(next); - if (s2 == DFAConst.UNCLASSIFIED_INPUT) { + int s2; + if (states.Contains(next)) { + s2 = states.Translate(next); + } else { s2 = states.DefineSymbol(next); - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) { + if (IsFinal(next)) { + dfa.MarkFinalState(s2); - dfa.SetStateTag(s2, tags); + tags = GetStateTags(next); + if (tags != null && tags.Length > 0) + dfa.SetStateTag(s2, tags); } queue.Enqueue(next); @@ -175,9 +192,14 @@ } } + bool IsFinal(IEnumerable<int> state) { + Debug.Assert(state != null); + return state.Any(m_ends.Contains); + } + TTag[] GetStateTags(IEnumerable<int> state) { Debug.Assert(state != null); - return state.Where(m_ends.ContainsKey).Select(pos => m_ends[pos]).ToArray(); + return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); } }
--- a/Implab/Automaton/RegularExpressions/StarToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/StarToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,28 +1,25 @@ using Implab; using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; + namespace Implab.Automaton.RegularExpressions { /// <summary> /// Замыкание выражения с 0 и более повторов. /// </summary> - public class StarToken<TTag>: Token<TTag> { + public class StarToken: Token { - Token<TTag> m_token; + Token m_token; - public Token<TTag> Token { + public Token Token { get { return m_token; } } - public StarToken(Token<TTag> token) { + public StarToken(Token token) { Safe.ArgumentNotNull(token, "token"); m_token = token; } - public override void Accept(IVisitor<TTag> visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this); }
--- a/Implab/Automaton/RegularExpressions/SymbolToken.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/SymbolToken.cs Wed Mar 23 01:42:00 2016 +0300 @@ -4,7 +4,7 @@ /// <summary> /// Выражение, соответсвующее одному символу. /// </summary> - public class SymbolToken<TTag> : Token<TTag> { + public class SymbolToken: Token { int m_value; public int Value { @@ -14,7 +14,7 @@ public SymbolToken(int value) { m_value = value; } - public override void Accept(IVisitor<TTag> visitor) { + public override void Accept(IVisitor visitor) { Safe.ArgumentNotNull(visitor, "visitor"); visitor.Visit(this);
--- a/Implab/Automaton/RegularExpressions/Token.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/Token.cs Wed Mar 23 01:42:00 2016 +0300 @@ -3,46 +3,46 @@ using System.Linq; namespace Implab.Automaton.RegularExpressions { - public abstract class Token<TTag> { - public abstract void Accept(IVisitor<TTag> visitor); + public abstract class Token { + public abstract void Accept(IVisitor visitor); - public Token<TTag> Extend() { - return Cat(new EndToken<TTag>()); + public Token Extend() { + return Cat(new EndToken()); } - public Token<TTag> Tag(TTag tag) { + public Token Tag<TTag>(TTag tag) { return Cat(new EndToken<TTag>(tag)); } - public Token<TTag> Cat(Token<TTag> right) { - return new CatToken<TTag>(this, right); + public Token Cat(Token right) { + return new CatToken(this, right); } - public Token<TTag> Or(Token<TTag> right) { - return new AltToken<TTag>(this, right); + public Token Or(Token right) { + return new AltToken(this, right); } - public Token<TTag> Optional() { - return Or(new EmptyToken<TTag>()); + public Token Optional() { + return Or(new EmptyToken()); } - public Token<TTag> EClosure() { - return new StarToken<TTag>(this); + public Token EClosure() { + return new StarToken(this); } - public Token<TTag> Closure() { - return Cat(new StarToken<TTag>(this)); + public Token Closure() { + return Cat(new StarToken(this)); } - public Token<TTag> Repeat(int count) { - Token<TTag> token = null; + public Token Repeat(int count) { + Token token = null; for (int i = 0; i < count; i++) token = token != null ? token.Cat(this) : this; - return token ?? new EmptyToken<TTag>(); + return token ?? new EmptyToken(); } - public Token<TTag> Repeat(int min, int max) { + public Token Repeat(int min, int max) { if (min > max || min < 1) throw new ArgumentOutOfRangeException(); var token = Repeat(min); @@ -52,11 +52,11 @@ return token; } - public static Token<TTag> New(params int[] set) { + public static Token New(params int[] set) { Safe.ArgumentNotNull(set, "set"); - Token<TTag> token = null; + Token token = null; foreach(var c in set.Distinct()) - token = token == null ? new SymbolToken<TTag>(c) : token.Or(new SymbolToken<TTag>(c)); + token = token == null ? new SymbolToken(c) : token.Or(new SymbolToken(c)); return token; } }
--- a/Implab/Formats/ByteAlphabet.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/ByteAlphabet.cs Wed Mar 23 01:42:00 2016 +0300 @@ -4,8 +4,6 @@ namespace Implab.Formats { public class ByteAlphabet : IndexedAlphabetBase<byte> { - public ByteAlphabet() { - } #region implemented abstract members of IndexedAlphabetBase
--- a/Implab/Formats/CharAlphabet.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/CharAlphabet.cs Wed Mar 23 01:42:00 2016 +0300 @@ -5,9 +5,6 @@ namespace Implab.Formats { public class CharAlphabet: IndexedAlphabetBase<char> { - public CharAlphabet() { - } - public override int GetSymbolIndex(char symbol) { return symbol; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Formats/Grammar.cs Wed Mar 23 01:42:00 2016 +0300 @@ -0,0 +1,100 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using Implab.Automaton; +using Implab.Automaton.RegularExpressions; + +namespace Implab.Formats { + /// <summary> + /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. + /// </summary> + public abstract class Grammar<TSymbol, TTag> { + + protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { + get; + } + + protected SymbolToken<TTag> UnclassifiedToken() { + return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); + } + + protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + foreach (var ch in alphabet) + AlphabetBuilder.DefineSymbol(ch); + } + + protected Token<TTag> SymbolToken(TSymbol symbol) { + return Token<TTag>.New(TranslateOrAdd(symbol)); + } + + protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); + } + + protected Token<TTag> SymbolSetToken(params TSymbol[] set) { + return SymbolToken(set); + } + + int TranslateOrAdd(TSymbol ch) { + var t = AlphabetBuilder.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + t = AlphabetBuilder.DefineSymbol(ch); + return t; + } + + IEnumerable<int> TranslateOrAdd(IEnumerable<TSymbol> symbols) { + return symbols.Distinct().Select(TranslateOrAdd); + } + + int TranslateOrDie(TSymbol ch) { + var t = AlphabetBuilder.Translate(ch); + if (t == DFAConst.UNCLASSIFIED_INPUT) + throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); + return t; + } + + IEnumerable<int> TranslateOrDie(IEnumerable<TSymbol> symbols) { + return symbols.Distinct().Select(TranslateOrDie); + } + + protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); + } + + protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); + + protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { + + var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); + + var visitor = new RegularExpressionVisitor<TTag>(); + regexp.Accept( visitor ); + + visitor.BuildDFA(dfa); + + if (dfa.IsFinalState(dfa.InitialState)) + throw new ApplicationException("The specified language contains empty token"); + + var ab = CreateAlphabet(); + var optimal = dfa.Optimize(ab); + + return new ScannerContext<TTag>( + optimal.CreateTransitionTable(), + optimal.CreateFinalStateTable(), + optimal.CreateTagTable(), + optimal.InitialState, + ab.GetTranslationMap() + ); + } + + } + + +}
--- a/Implab/Formats/JSON/JSONScanner.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/JSON/JSONScanner.cs Wed Mar 23 01:42:00 2016 +0300 @@ -4,7 +4,6 @@ using System.Text; using Implab.Components; using System.IO; -using Implab.Automaton.RegularExpressions; namespace Implab.Formats.JSON { /// <summary> @@ -13,8 +12,8 @@ public class JSONScanner : Disposable { readonly StringBuilder m_builder = new StringBuilder(); - readonly ScannerContext<JSONGrammar.TokenType> m_jsonScanner = JSONGrammar.Instance.JsonDFA; - readonly ScannerContext<JSONGrammar.TokenType> m_stringScanner = JSONGrammar.Instance.JsonStringDFA; + readonly ScannerContext<JSONGrammar.TokenType> m_jsonContext = JSONGrammar.Instance.JsonDFA; + readonly ScannerContext<JSONGrammar.TokenType> m_stringContext = JSONGrammar.Instance.JsonStringDFA; readonly TextScanner m_scanner; @@ -31,7 +30,7 @@ public JSONScanner(TextReader reader, int bufferMax, int chunkSize) { Safe.ArgumentNotNull(reader, "reader"); - m_scanner = new ReaderScanner(reader); + m_scanner = new ReaderScanner(reader, bufferMax, chunkSize); } /// <summary> @@ -44,7 +43,7 @@ /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks> public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { JSONGrammar.TokenType[] tag; - if (m_jsonScanner.Execute(m_scanner, out tag)) { + if (m_jsonContext.Execute(m_scanner, out tag)) { switch (tag[0]) { case JSONGrammar.TokenType.StringBound: tokenValue = ReadString(); @@ -68,12 +67,12 @@ string ReadString() { int pos = 0; - char[] buf = new char[6]; // the buffer for unescaping chars + var buf = new char[6]; // the buffer for unescaping chars JSONGrammar.TokenType[] tag; m_builder.Clear(); - while (m_stringScanner.Execute(m_scanner, out tag)) { + while (m_stringContext.Execute(m_scanner, out tag)) { switch (tag[0]) { case JSONGrammar.TokenType.StringBound: return m_builder.ToString(); @@ -89,13 +88,17 @@ m_scanner.CopyTokenTo(buf, 0); m_builder.Append(StringTranslator.TranslateEscapedChar(buf[1])); break; - default: - break; } } throw new ParserException("Unexpected end of data"); } + + protected override void Dispose(bool disposing) { + if (disposing) + Safe.Dispose(m_scanner); + base.Dispose(disposing); + } } }
--- a/Implab/Formats/ScannerContext.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/ScannerContext.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,11 +1,17 @@ -using System; - -namespace Implab.Formats { +namespace Implab.Formats { + /// <summary> + /// Represents a scanner configuration usefull to recongnize token, based on the DFA. + /// </summary> public class ScannerContext<TTag> { + public int[,] Dfa { get; private set; } + public bool[] Final { get; private set; } + public TTag[][] Tags { get; private set; } + public int State { get; private set; } + public int[] Alphabet { get; private set; } public ScannerContext(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet) {
--- a/Implab/Formats/TextScanner.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Formats/TextScanner.cs Wed Mar 23 01:42:00 2016 +0300 @@ -1,9 +1,7 @@ using System; using Implab.Components; -using Implab.Automaton.RegularExpressions; using System.Diagnostics; using Implab.Automaton; -using System.IO; using System.Text; namespace Implab.Formats { @@ -18,7 +16,7 @@ int m_tokenLength; /// <summary> - /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. + /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. /// </summary> /// <param name="bufferMax">Buffer max.</param> /// <param name="chunkSize">Chunk size.</param> @@ -30,7 +28,7 @@ } /// <summary> - /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner`1"/> class. + /// Initializes a new instance of the <see cref="Implab.Formats.TextScanner"/> class. /// </summary> /// <param name="buffer">Buffer.</param> protected TextScanner(char[] buffer) { @@ -48,7 +46,9 @@ /// <param name="final">Final states of the automaton.</param> /// <param name="tags">Tags.</param> /// <param name="state">The initial state for the automaton.</param> - internal bool ReadToken<TTag>(int[,] dfa, int[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { + /// <param name="alphabet"></param> + /// <param name = "tag"></param> + internal bool ReadToken<TTag>(int[,] dfa, bool[] final, TTag[][] tags, int state, int[] alphabet, out TTag[] tag) { Safe.ArgumentNotNull(); m_tokenLength = 0; @@ -58,10 +58,10 @@ // after the next chunk is read the offset in the buffer may change int pos = m_bufferOffset + m_tokenLength; - while(pos < m_bufferSize) { + while (pos < m_bufferSize) { var ch = m_buffer[pos]; - state = dfa[state,ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; + state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; if (state == DFAConst.UNREACHABLE_STATE) break; @@ -77,16 +77,17 @@ if (final[state]) { tag = tags[state]; return true; - } else { - if (m_bufferOffset == m_bufferSize) { - if (m_tokenLength == 0) //EOF + } + + if (m_bufferOffset == m_bufferSize) { + if (m_tokenLength == 0) //EOF return false; - throw new ParserException(); - } - throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); + throw new ParserException(); + } + + throw new ParserException(String.Format("Unexpected symbol '{0}'", m_buffer[m_bufferOffset])); - } } protected void Feed(char[] buffer, int offset, int length) { @@ -108,7 +109,7 @@ var size = used + free; if (size > m_bufferMax) - throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached"), m_bufferMax/1024); + throw new ParserException(String.Format("The buffer limit ({0} Kb) is reached", m_bufferMax/1024)); var temp = new char[size];
--- a/Implab/Implab.csproj Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Implab.csproj Wed Mar 23 01:42:00 2016 +0300 @@ -160,11 +160,9 @@ <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> <Compile Include="Automaton\DFAConst.cs" /> - <Compile Include="Automaton\RegularExpressions\Grammar.cs" /> <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> - <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> <Compile Include="Automaton\RegularExpressions\Token.cs" /> <Compile Include="Automaton\RegularExpressions\IVisitor.cs" /> <Compile Include="Automaton\AutomatonTransition.cs" /> @@ -192,6 +190,10 @@ <Compile Include="Formats\StringScanner.cs" /> <Compile Include="Formats\ReaderScanner.cs" /> <Compile Include="Formats\ScannerContext.cs" /> + <Compile Include="Formats\Grammar.cs" /> + <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> + <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> + <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <ItemGroup />
--- a/Implab/Safe.cs Tue Mar 22 18:58:40 2016 +0300 +++ b/Implab/Safe.cs Wed Mar 23 01:42:00 2016 +0300 @@ -41,6 +41,11 @@ throw new ArgumentOutOfRangeException(paramName); } + public static void ArgumentOfType(object value, Type type, string paramName) { + if (!type.IsInstanceOfType(value)) + throw new ArgumentException(String.Format("The parameter must be of type {0}", type), paramName); + } + public static void Dispose(params IDisposable[] objects) { foreach (var d in objects) if (d != null)