Mercurial > pub > ImplabNet
changeset 178:d5c5db0335ee ref20160224
working on JSON parser
author | cin |
---|---|
date | Wed, 23 Mar 2016 19:51:45 +0300 |
parents | a0ff6a0e9c44 |
children | 478ef706906a |
files | Implab/Automaton/AutomatonConst.cs Implab/Automaton/DFAConst.cs Implab/Automaton/DFATable.cs Implab/Automaton/MapAlphabet.cs Implab/Automaton/RegularExpressions/EndTokenT.cs Implab/Automaton/RegularExpressions/IVisitorT.cs Implab/Automaton/RegularExpressions/RegularDFA.cs Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Implab/Automaton/RegularExpressions/RegularExpressionVisitorT.cs Implab/Automaton/RegularExpressions/Token.cs Implab/Components/LazyAndWeak.cs Implab/Formats/Grammar.cs Implab/Formats/JSON/JSONElementContext.cs Implab/Formats/JSON/JSONGrammar.cs Implab/Formats/JSON/JSONParser.cs Implab/Formats/TextScanner.cs Implab/Implab.csproj |
diffstat | 17 files changed, 247 insertions(+), 163 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/AutomatonConst.cs Wed Mar 23 19:51:45 2016 +0300 @@ -0,0 +1,9 @@ + +namespace Implab.Automaton { + public static class DFAConst { + public const int UNREACHABLE_STATE = -1; + + public const int UNCLASSIFIED_INPUT = 0; + } +} +
--- a/Implab/Automaton/DFAConst.cs Wed Mar 23 01:42:00 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -using System; - -namespace Implab.Automaton { - public static class DFAConst { - public const int UNREACHABLE_STATE = -1; - - public const int UNCLASSIFIED_INPUT = 0; - } -} -
--- a/Implab/Automaton/DFATable.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/DFATable.cs Wed Mar 23 19:51:45 2016 +0300 @@ -105,7 +105,7 @@ for (int i = 0; i < StateCount; i++) for (int j = 0; i < AlphabetSize; j++) - table[i, j] = DFAConst.UNREACHABLE_STATE; + table[i, j] = AutomatonConst.UNREACHABLE_STATE; foreach (var t in this) table[t.s1,t.edge] = t.s2; @@ -273,11 +273,11 @@ var nextCls = 0; foreach (var item in minClasses) { - if (nextCls == DFAConst.UNCLASSIFIED_INPUT) + if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT) nextCls++; // сохраняем DFAConst.UNCLASSIFIED_INPUT - var cls = item.Contains(DFAConst.UNCLASSIFIED_INPUT) ? DFAConst.UNCLASSIFIED_INPUT : nextCls; + var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls; foreach (var a in item) alphabetMap[a] = cls;
--- a/Implab/Automaton/MapAlphabet.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/MapAlphabet.cs Wed Mar 23 19:51:45 2016 +0300 @@ -54,7 +54,7 @@ return cls; if (!m_supportUnclassified) throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet"); - return DFAConst.UNCLASSIFIED_INPUT; + return AutomatonConst.UNCLASSIFIED_INPUT; } public int Count {
--- a/Implab/Automaton/RegularExpressions/EndTokenT.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/EndTokenT.cs Wed Mar 23 19:51:45 2016 +0300 @@ -1,13 +1,11 @@ -using Implab; - -namespace Implab.Automaton.RegularExpressions { +namespace Implab.Automaton.RegularExpressions { /// <summary> /// Конечный символ расширенного регулярного выражения, при построении ДКА /// используется для определения конечных состояний. /// </summary> - public class EndToken<TTag>: Token { + public class EndToken<TTag>: EndToken { - TTag m_tag; + readonly TTag m_tag; public EndToken(TTag tag) { m_tag = tag; @@ -20,14 +18,6 @@ public TTag Tag { get { return m_tag; } } - - public override void Accept(IVisitor visitor) { - Safe.ArgumentOfType(visitor, typeof(IVisitor<TTag>), "visitor"); - Safe.ArgumentNotNull(visitor, "visitor"); - ((IVisitor<TTag>)visitor).Visit(this); - } - public override string ToString() { - return "#"; - } + } }
--- a/Implab/Automaton/RegularExpressions/IVisitorT.cs Wed Mar 23 01:42:00 2016 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -namespace Implab.Automaton.RegularExpressions { - /// <summary> - /// Интерфейс обходчика синтаксического дерева регулярного выражения - /// </summary> - public interface IVisitor<T> : IVisitor { - void Visit(EndToken<T> token); - } -}
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Wed Mar 23 19:51:45 2016 +0300 @@ -2,12 +2,12 @@ using System.Linq; namespace Implab.Automaton.RegularExpressions { - public class RegularDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { + public class TaggedDFA<TInput, TTag> : DFATable, ITaggedDFABuilder<TTag> { readonly Dictionary<int,TTag[]> m_tags = new Dictionary<int, TTag[]>(); readonly IAlphabet<TInput> m_alphabet; - public RegularDFA(IAlphabet<TInput> alphabet) { + public TaggedDFA(IAlphabet<TInput> alphabet) { Safe.ArgumentNotNull(alphabet, "aplhabet"); m_alphabet = alphabet; @@ -48,10 +48,10 @@ /// Optimize the specified alphabet. /// </summary> /// <param name="alphabet">Пустой алфавит, который будет зполнен в процессе оптимизации.</param> - public RegularDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { + public TaggedDFA<TInput,TTag> Optimize(IAlphabetBuilder<TInput> alphabet) { Safe.ArgumentNotNull(alphabet, "alphabet"); - var dfa = new RegularDFA<TInput, TTag>(alphabet); + var dfa = new TaggedDFA<TInput, TTag>(alphabet); var states = new DummyAlphabet(StateCount); var alphaMap = new Dictionary<int,int>();
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 19:51:45 2016 +0300 @@ -10,7 +10,7 @@ /// регулярное выражение и вычисляет followpos, затем используется метод /// <see cref="BuildDFA(IDFADefinition)"/> для построения автомата. /// </summary> - public class RegularExpressionVisitor<TTag> : IVisitor<TTag> { + public class RegularExpressionVisitor : IVisitor { int m_idx; Token m_root; HashSet<int> m_firstpos; @@ -19,13 +19,23 @@ readonly Dictionary<int, HashSet<int>> m_followpos = new Dictionary<int, HashSet<int>>(); readonly Dictionary<int, int> m_indexes = new Dictionary<int, int>(); readonly HashSet<int> m_ends = new HashSet<int>(); - readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); - public Dictionary<int, HashSet<int>> FollowposMap { - get { return m_followpos; } + readonly IDFATableBuilder m_builder; + readonly IAlphabetBuilder<HashSet<int>> m_states = new MapAlphabet<HashSet<int>>( + false, + new CustomEqualityComparer<HashSet<int>>( + (x, y) => x.SetEquals(y), + x => x.Sum(n => n.GetHashCode()) + ) + ); + + public RegularExpressionVisitor(IDFATableBuilder builder) { + Safe.ArgumentNotNull(builder, "builder"); + + m_builder = builder; } - public HashSet<int> Followpos(int pos) { + HashSet<int> Followpos(int pos) { HashSet<int> set; return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet<int>(); } @@ -42,6 +52,9 @@ return false; } + protected int Index { + get { return m_idx; } + } public void Visit(AltToken token) { if (m_root == null) @@ -112,45 +125,23 @@ m_lastpos = new HashSet<int>(new[] { m_idx }); } - public void Visit(EndToken<TTag> token) { + public virtual void Visit(EndToken token) { if (m_root == null) m_root = token; m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; - m_firstpos = new HashSet<int>(new[] { m_idx }); - m_lastpos = new HashSet<int>(new[] { m_idx }); - Followpos(m_idx); - m_ends.Add(m_idx); - m_tags.Add(m_idx, token.Tag); - } - - public void Visit(EndToken token) { - if (m_root == null) - m_root = token; - m_idx++; - m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT; + m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT; m_firstpos = new HashSet<int>(new[] { m_idx }); m_lastpos = new HashSet<int>(new[] { m_idx }); Followpos(m_idx); m_ends.Add(m_idx); } - public void BuildDFA(ITaggedDFABuilder<TTag> dfa) { - Safe.ArgumentNotNull(dfa,"dfa"); + public void BuildDFA() { + AddState(m_firstpos); + SetInitialState(m_firstpos); - var states = new MapAlphabet<HashSet<int>>( - false, - new CustomEqualityComparer<HashSet<int>>( - (x, y) => x.SetEquals(y), - x => x.Sum(n => n.GetHashCode()) - )); - - var initialState = states.DefineSymbol(m_firstpos); - dfa.SetInitialState(initialState); - - var tags = GetStateTags(m_firstpos); - if (tags != null && tags.Length > 0) - dfa.MarkFinalState(initialState, tags); + if(IsFinal(m_firstpos)) + MarkFinalState(m_firstpos); var inputMax = m_indexes.Values.Max(); var queue = new Queue<HashSet<int>>(); @@ -158,49 +149,64 @@ queue.Enqueue(m_firstpos); while (queue.Count > 0) { - var state = queue.Dequeue(); - var s1 = states.Translate(state); - Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT); + var s1 = queue.Dequeue(); for (int a = 0; a <= inputMax; a++) { - var next = new HashSet<int>(); - foreach (var p in state) { + var s2 = new HashSet<int>(); + foreach (var p in s1) { if (m_indexes[p] == a) { - next.UnionWith(Followpos(p)); + s2.UnionWith(Followpos(p)); } } - if (next.Count > 0) { - int s2; - if (states.Contains(next)) { - s2 = states.Translate(next); - } else { - s2 = states.DefineSymbol(next); + if (s2.Count > 0) { + if (!HasState(s2)) { + AddState(s2); + if (IsFinal(s2)) + MarkFinalState(s2); + + queue.Enqueue(s2); + } - if (IsFinal(next)) { - - dfa.MarkFinalState(s2); - tags = GetStateTags(next); - if (tags != null && tags.Length > 0) - dfa.SetStateTag(s2, tags); - } - - queue.Enqueue(next); - } - dfa.Add(new AutomatonTransition(s1, s2, a)); + DefineTransition(s1, s2, a); } + } } } + protected bool HasState(HashSet<int> state) { + return m_states.Contains(state); + } + + protected void AddState(HashSet<int> state) { + Debug.Assert(!HasState(state)); + + m_states.DefineSymbol(state); + } + + protected int Translate(HashSet<int> state) { + Debug.Assert(HasState(state)); + + return m_states.Translate(state); + } + + protected virtual void SetInitialState(HashSet<int> state) { + m_builder.SetInitialState(Translate(state)); + } + + protected virtual void MarkFinalState(HashSet<int> state) { + m_builder.MarkFinalState(Translate(state)); + } + + protected virtual void DefineTransition(HashSet<int> s1, HashSet<int> s2, int ch) { + + m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch)); + } + bool IsFinal(IEnumerable<int> state) { Debug.Assert(state != null); return state.Any(m_ends.Contains); } - TTag[] GetStateTags(IEnumerable<int> state) { - Debug.Assert(state != null); - return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); - } - } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitorT.cs Wed Mar 23 19:51:45 2016 +0300 @@ -0,0 +1,37 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Implab.Automaton.RegularExpressions { + /// <summary> + /// </summary> + public class RegularExpressionVisitor<TTag> : RegularExpressionVisitor { + readonly Dictionary<int, TTag> m_tags = new Dictionary<int, TTag>(); + + readonly ITaggedDFABuilder<TTag> m_builder; + + public RegularExpressionVisitor(ITaggedDFABuilder<TTag> builder) : base(builder) { + m_builder = builder; + } + + public override void Visit(EndToken token) { + base.Visit(token); + var tagged = token as EndToken<TTag>; + if (tagged != null) + m_tags.Add(Index, tagged.Tag); + } + + protected override void MarkFinalState(HashSet<int> state) { + base.MarkFinalState(state); + m_builder.SetStateTag(Translate(state), GetStateTags(state)); + } + + TTag[] GetStateTags(IEnumerable<int> state) { + Debug.Assert(state != null); + return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray(); + } + + } +}
--- a/Implab/Automaton/RegularExpressions/Token.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Automaton/RegularExpressions/Token.cs Wed Mar 23 19:51:45 2016 +0300 @@ -6,7 +6,7 @@ public abstract class Token { public abstract void Accept(IVisitor visitor); - public Token Extend() { + public Token End() { return Cat(new EndToken()); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Components/LazyAndWeak.cs Wed Mar 23 19:51:45 2016 +0300 @@ -0,0 +1,44 @@ +using System; +using System.Threading; + +namespace Implab.Components { + public class LazyAndWeak<T> where T : class { + + readonly Func<T> m_factory; + readonly object m_lock; + WeakReference m_reference; + + + public LazyAndWeak(Func<T> factory, bool useLock) { + Safe.ArgumentNotNull(factory, "factory"); + m_factory = factory; + m_lock = useLock ? new object() : null; + } + + public LazyAndWeak(Func<T> factory) : this(factory, false) { + } + + public T Value { + get { + while (true) { + var weak = m_reference; + T value; + if (weak != null) { + value = weak.Target as T; + if (value != null) + return value; + } + + if (m_lock == null) { + value = m_factory(); + + if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak) + return value; + } else { + } + } + } + } + } +} +
--- a/Implab/Formats/Grammar.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Formats/Grammar.cs Wed Mar 23 19:51:45 2016 +0300 @@ -9,14 +9,14 @@ /// <summary> /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа <c>char</c>. /// </summary> - public abstract class Grammar<TSymbol, TTag> { + public abstract class Grammar<TSymbol> { protected abstract IAlphabetBuilder<TSymbol> AlphabetBuilder { get; } - protected SymbolToken<TTag> UnclassifiedToken() { - return new SymbolToken<TTag>(DFAConst.UNCLASSIFIED_INPUT); + protected SymbolToken UnclassifiedToken() { + return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT); } protected void DefineAlphabet(IEnumerable<TSymbol> alphabet) { @@ -26,23 +26,23 @@ AlphabetBuilder.DefineSymbol(ch); } - protected Token<TTag> SymbolToken(TSymbol symbol) { - return Token<TTag>.New(TranslateOrAdd(symbol)); + protected Token SymbolToken(TSymbol symbol) { + return Token.New(TranslateOrAdd(symbol)); } - protected Token<TTag> SymbolToken(IEnumerable<TSymbol> symbols) { + protected Token SymbolToken(IEnumerable<TSymbol> symbols) { Safe.ArgumentNotNull(symbols, "symbols"); - return Token<TTag>.New(TranslateOrAdd(symbols).ToArray()); + return Token.New(TranslateOrAdd(symbols).ToArray()); } - protected Token<TTag> SymbolSetToken(params TSymbol[] set) { + protected Token SymbolSetToken(params TSymbol[] set) { return SymbolToken(set); } int TranslateOrAdd(TSymbol ch) { var t = AlphabetBuilder.Translate(ch); - if (t == DFAConst.UNCLASSIFIED_INPUT) + if (t == AutomatonConst.UNCLASSIFIED_INPUT) t = AlphabetBuilder.DefineSymbol(ch); return t; } @@ -53,7 +53,7 @@ int TranslateOrDie(TSymbol ch) { var t = AlphabetBuilder.Translate(ch); - if (t == DFAConst.UNCLASSIFIED_INPUT) + if (t == AutomatonConst.UNCLASSIFIED_INPUT) throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); return t; } @@ -62,22 +62,21 @@ return symbols.Distinct().Select(TranslateOrDie); } - protected Token<TTag> SymbolTokenExcept(IEnumerable<TSymbol> symbols) { + protected Token SymbolTokenExcept(IEnumerable<TSymbol> symbols) { Safe.ArgumentNotNull(symbols, "symbols"); - return Token<TTag>.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); + return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() ); } protected abstract IndexedAlphabetBase<TSymbol> CreateAlphabet(); - protected ScannerContext<TTag> BuildScannerContext(Token<TTag> regexp) { + protected ScannerContext<TTag> BuildScannerContext<TTag>(Token regexp) { var dfa = new RegularDFA<TSymbol, TTag>(AlphabetBuilder); - var visitor = new RegularExpressionVisitor<TTag>(); - regexp.Accept( visitor ); - - visitor.BuildDFA(dfa); + var visitor = new RegularExpressionVisitor<TTag>(dfa); + regexp.Accept(visitor); + visitor.BuildDFA(); if (dfa.IsFinalState(dfa.InitialState)) throw new ApplicationException("The specified language contains empty token");
--- a/Implab/Formats/JSON/JSONElementContext.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Formats/JSON/JSONElementContext.cs Wed Mar 23 19:51:45 2016 +0300 @@ -5,7 +5,6 @@ enum JSONElementContext { None, Object, - Array, - Closed + Array } }
--- a/Implab/Formats/JSON/JSONGrammar.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Formats/JSON/JSONGrammar.cs Wed Mar 23 19:51:45 2016 +0300 @@ -4,7 +4,7 @@ using Implab.Automaton; namespace Implab.Formats.JSON { - class JSONGrammar : Grammar<char,JSONGrammar.TokenType> { + class JSONGrammar : Grammar<char> { public enum TokenType { None, BeginObject, @@ -29,8 +29,8 @@ get { return _instance.Value; } } - readonly ScannerContext<TokenType> m_jsonDFA; - readonly ScannerContext<TokenType> m_stringDFA; + readonly ScannerContext<TokenType> m_jsonExpression; + readonly ScannerContext<TokenType> m_stringExpression; public JSONGrammar() { DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); @@ -81,23 +81,25 @@ .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); - m_jsonDFA = BuildScannerContext(jsonExpression); - m_stringDFA = BuildScannerContext(jsonStringExpression); + m_jsonExpression = BuildScannerContext<TokenType>(jsonExpression); + m_stringExpression = BuildScannerContext<TokenType>(jsonStringExpression); + + } - public ScannerContext<TokenType> JsonDFA { + public ScannerContext<TokenType> JsonExpression { get { - return m_jsonDFA; + return m_jsonExpression; } } - public ScannerContext<TokenType> JsonStringDFA { + public ScannerContext<TokenType> JsonStringExpression { get { - return m_stringDFA; + return m_stringExpression; } } - Token<TokenType> SymbolRangeToken(char start, char stop) { + Token SymbolRangeToken(char start, char stop) { return SymbolToken(Enumerable.Range(start,stop - start).Cast<char>()); }
--- a/Implab/Formats/JSON/JSONParser.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Formats/JSON/JSONParser.cs Wed Mar 23 19:51:45 2016 +0300 @@ -38,17 +38,30 @@ MemberValue } + #region Parser rules struct ParserContext { - DFAStateDescriptior<object> - } + readonly int[,] m_dfa; + int m_state; + + readonly JSONElementContext m_elementContext; - static readonly EnumAlphabet<JsonTokenType> _alphabet = EnumAlphabet<JsonTokenType>.FullAlphabet; - static readonly DFAStateDescriptior<object>[] _jsonDFA; - static readonly int _jsonDFAInitialState; - static readonly DFAStateDescriptior<object>[] _objectDFA; - static readonly int _objectDFAInitialState; - static readonly DFAStateDescriptior<object>[] _arrayDFA; - static readonly int _arrayDFAInitialState; + public ParserContext(int[,] dfa, int state, JSONElementContext context) { + m_dfa = dfa; + m_state = state; + m_elementContext = context; + } + + public bool Move(JsonTokenType token) { + var next = m_dfa[m_state, token]; + if (next == AutomatonConst.UNREACHABLE_STATE) + return false; + m_state = next; + } + + public JSONElementContext ElementContext { + get { return m_elementContext; } + } + } static JSONParser() { @@ -64,7 +77,8 @@ ) .Optional() .Cat(Token(JsonTokenType.EndObject)) - .Tag(null); + .End(); + var arrayExpression = valueExpression .Cat( Token(JsonTokenType.ValueSeparator) @@ -73,29 +87,31 @@ ) .Optional() .Cat(Token(JsonTokenType.EndArray)) - .Tag(null); + .End(); - var jsonExpression = valueExpression.Tag(null); + var jsonExpression = valueExpression.End(); - _jsonDFA = CreateDFA(jsonExpression).GetTransitionTable(); - _objectDFA = CreateDFA(objectExpression).GetTransitionTable(); - _arrayDFA = CreateDFA(arrayExpression).GetTransitionTable(); + _jsonDFA = CreateParserContext(jsonExpression, JSONElementContext.None); + _objectDFA = CreateParserContext(objectExpression, JSONElementContext.Object); + _arrayDFA = CreateParserContext(arrayExpression, JSONElementContext.Array); } - static Token<object> Token(params JsonTokenType[] input) { - return Token<object>.New(input.Select(t => _alphabet.Translate(t)).ToArray()); + static Token Token(params JsonTokenType[] input) { + return Token.New( input.Select(t => (int)t).ToArray() ); } - static RegularDFA<JsonTokenType,object> CreateDFA(Token<object> expr) { - var builder = new RegularExpressionVisitor<object>(); - var dfa = new RegularDFA<JsonTokenType,object>(_alphabet); - + static ParserContext CreateParserContext(Token expr, JSONElementContext context) { + + var dfa = new DFATable(); + var builder = new RegularExpressionVisitor(dfa); expr.Accept(builder); + builder.BuildDFA(); - builder.BuildDFA(dfa); - return dfa; + return new ParserContext(dfa.CreateTransitionTable(), dfa.InitialState, context); } + #endregion + JSONScanner m_scanner; MemberContext m_memberContext; @@ -117,8 +133,7 @@ /// Создает новый экземпляр парсера, на основе текстового потока. /// </summary> /// <param name="reader">Текстовый поток.</param> - /// <param name="dispose">Признак того, что парсер должен конролировать время жизни входного потока.</param> - public JSONParser(TextReader reader, bool dispose) + public JSONParser(TextReader reader) : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty }) { Safe.ArgumentNotNull(reader, "reader"); m_scanner = new JSONScanner();
--- a/Implab/Formats/TextScanner.cs Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Formats/TextScanner.cs Wed Mar 23 19:51:45 2016 +0300 @@ -61,15 +61,15 @@ while (pos < m_bufferSize) { var ch = m_buffer[pos]; - state = dfa[state, ch > maxSymbol ? DFAConst.UNCLASSIFIED_INPUT : alphabet[ch]]; - if (state == DFAConst.UNREACHABLE_STATE) + state = dfa[state, ch > maxSymbol ? AutomatonConst.UNCLASSIFIED_INPUT : alphabet[ch]]; + if (state == AutomatonConst.UNREACHABLE_STATE) break; pos++; } m_tokenLength = pos - m_bufferOffset; - } while (state != DFAConst.UNREACHABLE_STATE && Feed()); + } while (state != AutomatonConst.UNREACHABLE_STATE && Feed()); m_tokenOffset = m_bufferOffset; m_bufferOffset += m_tokenLength;
--- a/Implab/Implab.csproj Wed Mar 23 01:42:00 2016 +0300 +++ b/Implab/Implab.csproj Wed Mar 23 19:51:45 2016 +0300 @@ -159,7 +159,6 @@ <Compile Include="Automaton\RegularExpressions\AltToken.cs" /> <Compile Include="Automaton\RegularExpressions\BinaryToken.cs" /> <Compile Include="Automaton\RegularExpressions\CatToken.cs" /> - <Compile Include="Automaton\DFAConst.cs" /> <Compile Include="Automaton\RegularExpressions\StarToken.cs" /> <Compile Include="Automaton\RegularExpressions\SymbolToken.cs" /> <Compile Include="Automaton\RegularExpressions\EmptyToken.cs" /> @@ -183,7 +182,6 @@ <Compile Include="Automaton\IDFATable.cs" /> <Compile Include="Automaton\IDFATableBuilder.cs" /> <Compile Include="Automaton\DFATable.cs" /> - <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitor.cs" /> <Compile Include="Automaton\RegularExpressions\ITaggedDFABuilder.cs" /> <Compile Include="Formats\TextScanner.cs" /> @@ -193,7 +191,10 @@ <Compile Include="Formats\Grammar.cs" /> <Compile Include="Automaton\RegularExpressions\EndTokenT.cs" /> <Compile Include="Automaton\RegularExpressions\EndToken.cs" /> - <Compile Include="Automaton\RegularExpressions\IVisitorT.cs" /> + <Compile Include="Automaton\RegularExpressions\RegularExpressionVisitorT.cs" /> + <Compile Include="Automaton\AutomatonConst.cs" /> + <Compile Include="Automaton\RegularExpressions\RegularDFA.cs" /> + <Compile Include="Components\LazyAndWeak.cs" /> </ItemGroup> <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" /> <ItemGroup />