# HG changeset patch
# User cin
# Date 1458751905 -10800
# Node ID d5c5db0335ee6b92778df82f0340b28fa2060c14
# Parent a0ff6a0e9c447ff1b0006e6424a785bc39f971f7
working on JSON parser
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/AutomatonConst.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/AutomatonConst.cs Wed Mar 23 19:51:45 2016 +0300
@@ -0,0 +1,9 @@
+
+namespace Implab.Automaton {
+ public static class DFAConst {
+ public const int UNREACHABLE_STATE = -1;
+
+ public const int UNCLASSIFIED_INPUT = 0;
+ }
+}
+
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/DFAConst.cs
--- a/Implab/Automaton/DFAConst.cs Wed Mar 23 01:42:00 2016 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-using System;
-
-namespace Implab.Automaton {
- public static class DFAConst {
- public const int UNREACHABLE_STATE = -1;
-
- public const int UNCLASSIFIED_INPUT = 0;
- }
-}
-
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/DFATable.cs
--- a/Implab/Automaton/DFATable.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/DFATable.cs Wed Mar 23 19:51:45 2016 +0300
@@ -105,7 +105,7 @@
for (int i = 0; i < StateCount; i++)
for (int j = 0; i < AlphabetSize; j++)
- table[i, j] = DFAConst.UNREACHABLE_STATE;
+ table[i, j] = AutomatonConst.UNREACHABLE_STATE;
foreach (var t in this)
table[t.s1,t.edge] = t.s2;
@@ -273,11 +273,11 @@
var nextCls = 0;
foreach (var item in minClasses) {
- if (nextCls == DFAConst.UNCLASSIFIED_INPUT)
+ if (nextCls == AutomatonConst.UNCLASSIFIED_INPUT)
nextCls++;
// сохраняем DFAConst.UNCLASSIFIED_INPUT
- var cls = item.Contains(DFAConst.UNCLASSIFIED_INPUT) ? DFAConst.UNCLASSIFIED_INPUT : nextCls;
+ var cls = item.Contains(AutomatonConst.UNCLASSIFIED_INPUT) ? AutomatonConst.UNCLASSIFIED_INPUT : nextCls;
foreach (var a in item)
alphabetMap[a] = cls;
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/MapAlphabet.cs
--- a/Implab/Automaton/MapAlphabet.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/MapAlphabet.cs Wed Mar 23 19:51:45 2016 +0300
@@ -54,7 +54,7 @@
return cls;
if (!m_supportUnclassified)
throw new ArgumentOutOfRangeException("symbol", "The specified symbol isn't in the alphabet");
- return DFAConst.UNCLASSIFIED_INPUT;
+ return AutomatonConst.UNCLASSIFIED_INPUT;
}
public int Count {
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/RegularExpressions/EndTokenT.cs
--- a/Implab/Automaton/RegularExpressions/EndTokenT.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/EndTokenT.cs Wed Mar 23 19:51:45 2016 +0300
@@ -1,13 +1,11 @@
-using Implab;
-
-namespace Implab.Automaton.RegularExpressions {
+namespace Implab.Automaton.RegularExpressions {
///
/// Конечный символ расширенного регулярного выражения, при построении ДКА
/// используется для определения конечных состояний.
///
- public class EndToken: Token {
+ public class EndToken: EndToken {
- TTag m_tag;
+ readonly TTag m_tag;
public EndToken(TTag tag) {
m_tag = tag;
@@ -20,14 +18,6 @@
public TTag Tag {
get { return m_tag; }
}
-
- public override void Accept(IVisitor visitor) {
- Safe.ArgumentOfType(visitor, typeof(IVisitor), "visitor");
- Safe.ArgumentNotNull(visitor, "visitor");
- ((IVisitor)visitor).Visit(this);
- }
- public override string ToString() {
- return "#";
- }
+
}
}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/RegularExpressions/IVisitorT.cs
--- a/Implab/Automaton/RegularExpressions/IVisitorT.cs Wed Mar 23 01:42:00 2016 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-namespace Implab.Automaton.RegularExpressions {
- ///
- /// Интерфейс обходчика синтаксического дерева регулярного выражения
- ///
- public interface IVisitor : IVisitor {
- void Visit(EndToken token);
- }
-}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/RegularExpressions/RegularDFA.cs
--- a/Implab/Automaton/RegularExpressions/RegularDFA.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularDFA.cs Wed Mar 23 19:51:45 2016 +0300
@@ -2,12 +2,12 @@
using System.Linq;
namespace Implab.Automaton.RegularExpressions {
- public class RegularDFA : DFATable, ITaggedDFABuilder {
+ public class TaggedDFA : DFATable, ITaggedDFABuilder {
readonly Dictionary m_tags = new Dictionary();
readonly IAlphabet m_alphabet;
- public RegularDFA(IAlphabet alphabet) {
+ public TaggedDFA(IAlphabet alphabet) {
Safe.ArgumentNotNull(alphabet, "aplhabet");
m_alphabet = alphabet;
@@ -48,10 +48,10 @@
/// Optimize the specified alphabet.
///
/// Пустой алфавит, который будет зполнен в процессе оптимизации.
- public RegularDFA Optimize(IAlphabetBuilder alphabet) {
+ public TaggedDFA Optimize(IAlphabetBuilder alphabet) {
Safe.ArgumentNotNull(alphabet, "alphabet");
- var dfa = new RegularDFA(alphabet);
+ var dfa = new TaggedDFA(alphabet);
var states = new DummyAlphabet(StateCount);
var alphaMap = new Dictionary();
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs
--- a/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitor.cs Wed Mar 23 19:51:45 2016 +0300
@@ -10,7 +10,7 @@
/// регулярное выражение и вычисляет followpos, затем используется метод
/// для построения автомата.
///
- public class RegularExpressionVisitor : IVisitor {
+ public class RegularExpressionVisitor : IVisitor {
int m_idx;
Token m_root;
HashSet m_firstpos;
@@ -19,13 +19,23 @@
readonly Dictionary> m_followpos = new Dictionary>();
readonly Dictionary m_indexes = new Dictionary();
readonly HashSet m_ends = new HashSet();
- readonly Dictionary m_tags = new Dictionary();
- public Dictionary> FollowposMap {
- get { return m_followpos; }
+ readonly IDFATableBuilder m_builder;
+ readonly IAlphabetBuilder> m_states = new MapAlphabet>(
+ false,
+ new CustomEqualityComparer>(
+ (x, y) => x.SetEquals(y),
+ x => x.Sum(n => n.GetHashCode())
+ )
+ );
+
+ public RegularExpressionVisitor(IDFATableBuilder builder) {
+ Safe.ArgumentNotNull(builder, "builder");
+
+ m_builder = builder;
}
- public HashSet Followpos(int pos) {
+ HashSet Followpos(int pos) {
HashSet set;
return m_followpos.TryGetValue(pos, out set) ? set : m_followpos[pos] = new HashSet();
}
@@ -42,6 +52,9 @@
return false;
}
+ protected int Index {
+ get { return m_idx; }
+ }
public void Visit(AltToken token) {
if (m_root == null)
@@ -112,45 +125,23 @@
m_lastpos = new HashSet(new[] { m_idx });
}
- public void Visit(EndToken token) {
+ public virtual void Visit(EndToken token) {
if (m_root == null)
m_root = token;
m_idx++;
- m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
- m_firstpos = new HashSet(new[] { m_idx });
- m_lastpos = new HashSet(new[] { m_idx });
- Followpos(m_idx);
- m_ends.Add(m_idx);
- m_tags.Add(m_idx, token.Tag);
- }
-
- public void Visit(EndToken token) {
- if (m_root == null)
- m_root = token;
- m_idx++;
- m_indexes[m_idx] = DFAConst.UNCLASSIFIED_INPUT;
+ m_indexes[m_idx] = AutomatonConst.UNCLASSIFIED_INPUT;
m_firstpos = new HashSet(new[] { m_idx });
m_lastpos = new HashSet(new[] { m_idx });
Followpos(m_idx);
m_ends.Add(m_idx);
}
- public void BuildDFA(ITaggedDFABuilder dfa) {
- Safe.ArgumentNotNull(dfa,"dfa");
+ public void BuildDFA() {
+ AddState(m_firstpos);
+ SetInitialState(m_firstpos);
- var states = new MapAlphabet>(
- false,
- new CustomEqualityComparer>(
- (x, y) => x.SetEquals(y),
- x => x.Sum(n => n.GetHashCode())
- ));
-
- var initialState = states.DefineSymbol(m_firstpos);
- dfa.SetInitialState(initialState);
-
- var tags = GetStateTags(m_firstpos);
- if (tags != null && tags.Length > 0)
- dfa.MarkFinalState(initialState, tags);
+ if(IsFinal(m_firstpos))
+ MarkFinalState(m_firstpos);
var inputMax = m_indexes.Values.Max();
var queue = new Queue>();
@@ -158,49 +149,64 @@
queue.Enqueue(m_firstpos);
while (queue.Count > 0) {
- var state = queue.Dequeue();
- var s1 = states.Translate(state);
- Debug.Assert(s1 != DFAConst.UNCLASSIFIED_INPUT);
+ var s1 = queue.Dequeue();
for (int a = 0; a <= inputMax; a++) {
- var next = new HashSet();
- foreach (var p in state) {
+ var s2 = new HashSet();
+ foreach (var p in s1) {
if (m_indexes[p] == a) {
- next.UnionWith(Followpos(p));
+ s2.UnionWith(Followpos(p));
}
}
- if (next.Count > 0) {
- int s2;
- if (states.Contains(next)) {
- s2 = states.Translate(next);
- } else {
- s2 = states.DefineSymbol(next);
+ if (s2.Count > 0) {
+ if (!HasState(s2)) {
+ AddState(s2);
+ if (IsFinal(s2))
+ MarkFinalState(s2);
+
+ queue.Enqueue(s2);
+ }
- if (IsFinal(next)) {
-
- dfa.MarkFinalState(s2);
- tags = GetStateTags(next);
- if (tags != null && tags.Length > 0)
- dfa.SetStateTag(s2, tags);
- }
-
- queue.Enqueue(next);
- }
- dfa.Add(new AutomatonTransition(s1, s2, a));
+ DefineTransition(s1, s2, a);
}
+
}
}
}
+ protected bool HasState(HashSet state) {
+ return m_states.Contains(state);
+ }
+
+ protected void AddState(HashSet state) {
+ Debug.Assert(!HasState(state));
+
+ m_states.DefineSymbol(state);
+ }
+
+ protected int Translate(HashSet state) {
+ Debug.Assert(HasState(state));
+
+ return m_states.Translate(state);
+ }
+
+ protected virtual void SetInitialState(HashSet state) {
+ m_builder.SetInitialState(Translate(state));
+ }
+
+ protected virtual void MarkFinalState(HashSet state) {
+ m_builder.MarkFinalState(Translate(state));
+ }
+
+ protected virtual void DefineTransition(HashSet s1, HashSet s2, int ch) {
+
+ m_builder.Add(new AutomatonTransition(Translate(s1), Translate(s2), ch));
+ }
+
bool IsFinal(IEnumerable state) {
Debug.Assert(state != null);
return state.Any(m_ends.Contains);
}
- TTag[] GetStateTags(IEnumerable state) {
- Debug.Assert(state != null);
- return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
- }
-
}
}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/RegularExpressions/RegularExpressionVisitorT.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Automaton/RegularExpressions/RegularExpressionVisitorT.cs Wed Mar 23 19:51:45 2016 +0300
@@ -0,0 +1,37 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Implab.Automaton.RegularExpressions {
+ ///
+ ///
+ public class RegularExpressionVisitor : RegularExpressionVisitor {
+ readonly Dictionary m_tags = new Dictionary();
+
+ readonly ITaggedDFABuilder m_builder;
+
+ public RegularExpressionVisitor(ITaggedDFABuilder builder) : base(builder) {
+ m_builder = builder;
+ }
+
+ public override void Visit(EndToken token) {
+ base.Visit(token);
+ var tagged = token as EndToken;
+ if (tagged != null)
+ m_tags.Add(Index, tagged.Tag);
+ }
+
+ protected override void MarkFinalState(HashSet state) {
+ base.MarkFinalState(state);
+ m_builder.SetStateTag(Translate(state), GetStateTags(state));
+ }
+
+ TTag[] GetStateTags(IEnumerable state) {
+ Debug.Assert(state != null);
+ return state.Where(m_tags.ContainsKey).Select(pos => m_tags[pos]).ToArray();
+ }
+
+ }
+}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Automaton/RegularExpressions/Token.cs
--- a/Implab/Automaton/RegularExpressions/Token.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Automaton/RegularExpressions/Token.cs Wed Mar 23 19:51:45 2016 +0300
@@ -6,7 +6,7 @@
public abstract class Token {
public abstract void Accept(IVisitor visitor);
- public Token Extend() {
+ public Token End() {
return Cat(new EndToken());
}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Components/LazyAndWeak.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Components/LazyAndWeak.cs Wed Mar 23 19:51:45 2016 +0300
@@ -0,0 +1,44 @@
+using System;
+using System.Threading;
+
+namespace Implab.Components {
+ public class LazyAndWeak where T : class {
+
+ readonly Func m_factory;
+ readonly object m_lock;
+ WeakReference m_reference;
+
+
+ public LazyAndWeak(Func factory, bool useLock) {
+ Safe.ArgumentNotNull(factory, "factory");
+ m_factory = factory;
+ m_lock = useLock ? new object() : null;
+ }
+
+ public LazyAndWeak(Func factory) : this(factory, false) {
+ }
+
+ public T Value {
+ get {
+ while (true) {
+ var weak = m_reference;
+ T value;
+ if (weak != null) {
+ value = weak.Target as T;
+ if (value != null)
+ return value;
+ }
+
+ if (m_lock == null) {
+ value = m_factory();
+
+ if (Interlocked.CompareExchange(ref m_reference, new WeakReference(value), weak) == weak)
+ return value;
+ } else {
+ }
+ }
+ }
+ }
+ }
+}
+
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Formats/Grammar.cs
--- a/Implab/Formats/Grammar.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Formats/Grammar.cs Wed Mar 23 19:51:45 2016 +0300
@@ -9,14 +9,14 @@
///
/// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа char.
///
- public abstract class Grammar {
+ public abstract class Grammar {
protected abstract IAlphabetBuilder AlphabetBuilder {
get;
}
- protected SymbolToken UnclassifiedToken() {
- return new SymbolToken(DFAConst.UNCLASSIFIED_INPUT);
+ protected SymbolToken UnclassifiedToken() {
+ return new SymbolToken(AutomatonConst.UNCLASSIFIED_INPUT);
}
protected void DefineAlphabet(IEnumerable alphabet) {
@@ -26,23 +26,23 @@
AlphabetBuilder.DefineSymbol(ch);
}
- protected Token SymbolToken(TSymbol symbol) {
- return Token.New(TranslateOrAdd(symbol));
+ protected Token SymbolToken(TSymbol symbol) {
+ return Token.New(TranslateOrAdd(symbol));
}
- protected Token SymbolToken(IEnumerable symbols) {
+ protected Token SymbolToken(IEnumerable symbols) {
Safe.ArgumentNotNull(symbols, "symbols");
- return Token.New(TranslateOrAdd(symbols).ToArray());
+ return Token.New(TranslateOrAdd(symbols).ToArray());
}
- protected Token SymbolSetToken(params TSymbol[] set) {
+ protected Token SymbolSetToken(params TSymbol[] set) {
return SymbolToken(set);
}
int TranslateOrAdd(TSymbol ch) {
var t = AlphabetBuilder.Translate(ch);
- if (t == DFAConst.UNCLASSIFIED_INPUT)
+ if (t == AutomatonConst.UNCLASSIFIED_INPUT)
t = AlphabetBuilder.DefineSymbol(ch);
return t;
}
@@ -53,7 +53,7 @@
int TranslateOrDie(TSymbol ch) {
var t = AlphabetBuilder.Translate(ch);
- if (t == DFAConst.UNCLASSIFIED_INPUT)
+ if (t == AutomatonConst.UNCLASSIFIED_INPUT)
throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
return t;
}
@@ -62,22 +62,21 @@
return symbols.Distinct().Select(TranslateOrDie);
}
- protected Token SymbolTokenExcept(IEnumerable symbols) {
+ protected Token SymbolTokenExcept(IEnumerable symbols) {
Safe.ArgumentNotNull(symbols, "symbols");
- return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
+ return Token.New( Enumerable.Range(0, AlphabetBuilder.Count).Except(TranslateOrDie(symbols)).ToArray() );
}
protected abstract IndexedAlphabetBase CreateAlphabet();
- protected ScannerContext BuildScannerContext(Token regexp) {
+ protected ScannerContext BuildScannerContext(Token regexp) {
var dfa = new RegularDFA(AlphabetBuilder);
- var visitor = new RegularExpressionVisitor();
- regexp.Accept( visitor );
-
- visitor.BuildDFA(dfa);
+ var visitor = new RegularExpressionVisitor(dfa);
+ regexp.Accept(visitor);
+ visitor.BuildDFA();
if (dfa.IsFinalState(dfa.InitialState))
throw new ApplicationException("The specified language contains empty token");
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Formats/JSON/JSONElementContext.cs
--- a/Implab/Formats/JSON/JSONElementContext.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Formats/JSON/JSONElementContext.cs Wed Mar 23 19:51:45 2016 +0300
@@ -5,7 +5,6 @@
enum JSONElementContext {
None,
Object,
- Array,
- Closed
+ Array
}
}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Formats/JSON/JSONGrammar.cs
--- a/Implab/Formats/JSON/JSONGrammar.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Formats/JSON/JSONGrammar.cs Wed Mar 23 19:51:45 2016 +0300
@@ -4,7 +4,7 @@
using Implab.Automaton;
namespace Implab.Formats.JSON {
- class JSONGrammar : Grammar {
+ class JSONGrammar : Grammar {
public enum TokenType {
None,
BeginObject,
@@ -29,8 +29,8 @@
get { return _instance.Value; }
}
- readonly ScannerContext m_jsonDFA;
- readonly ScannerContext m_stringDFA;
+ readonly ScannerContext m_jsonExpression;
+ readonly ScannerContext m_stringExpression;
public JSONGrammar() {
DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
@@ -81,23 +81,25 @@
.Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
- m_jsonDFA = BuildScannerContext(jsonExpression);
- m_stringDFA = BuildScannerContext(jsonStringExpression);
+ m_jsonExpression = BuildScannerContext(jsonExpression);
+ m_stringExpression = BuildScannerContext(jsonStringExpression);
+
+
}
- public ScannerContext JsonDFA {
+ public ScannerContext JsonExpression {
get {
- return m_jsonDFA;
+ return m_jsonExpression;
}
}
- public ScannerContext JsonStringDFA {
+ public ScannerContext JsonStringExpression {
get {
- return m_stringDFA;
+ return m_stringExpression;
}
}
- Token SymbolRangeToken(char start, char stop) {
+ Token SymbolRangeToken(char start, char stop) {
return SymbolToken(Enumerable.Range(start,stop - start).Cast());
}
diff -r a0ff6a0e9c44 -r d5c5db0335ee Implab/Formats/JSON/JSONParser.cs
--- a/Implab/Formats/JSON/JSONParser.cs Wed Mar 23 01:42:00 2016 +0300
+++ b/Implab/Formats/JSON/JSONParser.cs Wed Mar 23 19:51:45 2016 +0300
@@ -38,17 +38,30 @@
MemberValue
}
+ #region Parser rules
struct ParserContext {
- DFAStateDescriptior