# HG changeset patch
# User cin
# Date 1402846751 -14400
# Node ID c0bf853aa04fd8e9e34f9d64bc1a70b8dc602d5d
# Parent 2c332a9c64c013d386c1617c4983cc8d28913b09
Added initial JSON support
+JSONParser
+JSONWriter
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/CustomEqualityComparer.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/CustomEqualityComparer.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,49 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab {
+ ///
+ /// Обертка для создания IEqualityComparer с использованием делегатов или лямда-выражений.
+ ///
+ /// Тип сравниваемых значений
+ public class CustomEqualityComparer : IEqualityComparer {
+ Func m_equals;
+ Func m_hash;
+
+ ///
+ /// Создает новый объект с указанными функциями сравнения на раветво и получения хеш-кода.
+ ///
+ /// Функция проверки на равенство
+ /// Функция получения хешкода
+ public CustomEqualityComparer(Func equality, Func hash) {
+ Safe.ArgumentNotNull(equality, "equality");
+ Safe.ArgumentNotNull(hash, "hash");
+ m_hash = hash;
+ m_equals = equality;
+ }
+
+ ///
+ /// Сравнивает два знаечния на ревенство.
+ ///
+ ///
+ ///
+ /// Результат сравнения на равенство
+ public bool Equals(T x, T y) {
+ return m_equals(x,y);
+ }
+
+ ///
+ /// Получает хеш-код для указанного значения.
+ ///
+ ///
+ /// Равные знаечния *должны* иметь одинаковый хеш-код.
+ /// Хеш-код
+ public int GetHashCode(T obj) {
+ return m_hash(obj);
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Implab.csproj
--- a/Implab/Implab.csproj Sat Apr 26 23:36:00 2014 +0400
+++ b/Implab/Implab.csproj Sun Jun 15 19:39:11 2014 +0400
@@ -33,6 +33,7 @@
+
@@ -52,10 +53,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JSONElementContext.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JSONElementContext.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,16 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ ///
+ /// internal
+ ///
+ public enum JSONElementContext {
+ None,
+ Object,
+ Array
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JSONElementType.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JSONElementType.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,34 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ ///
+ /// Тип элемента на котором находится парсер
+ ///
+ public enum JSONElementType {
+ None,
+ ///
+ /// Начало объекта
+ ///
+ BeginObject,
+ ///
+ /// Конец объекта
+ ///
+ EndObject,
+ ///
+ /// Начало массива
+ ///
+ BeginArray,
+ ///
+ /// Конец массива
+ ///
+ EndArray,
+ ///
+ /// Простое значение
+ ///
+ Value
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JSONGrammar.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JSONGrammar.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,113 @@
+using Implab.Parsing;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ internal class JSONGrammar : Grammar {
+ public enum TokenType : int{
+ None,
+ BeginObject,
+ EndObject,
+ BeginArray,
+ EndArray,
+ String,
+ Number,
+ Literal,
+ NameSeparator,
+ ValueSeparator,
+
+ StringBound,
+ EscapedChar,
+ UnescapedChar,
+ EscapedUnicode,
+
+ Minus,
+ Plus,
+ Sign,
+ Integer,
+ Dot,
+ Exp
+ }
+
+ readonly CDFADefinition m_jsonDFA;
+ readonly CDFADefinition m_stringDFA;
+
+ public JSONGrammar() {
+ DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x));
+ var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9'));
+ var digit9 = SymbolRangeToken('1', '9');
+ var zero = SymbolToken('0');
+ var digit = zero.Or(digit9);
+ var dot = SymbolToken('.');
+ var minus = SymbolToken('-');
+ var sign = SymbolSetToken('-', '+');
+ var expSign = SymbolSetToken('e', 'E');
+ var letters = SymbolRangeToken('a', 'z');
+ var integer = zero.Or(digit9.Cat(digit.EClosure()));
+ var frac = dot.Cat(digit.Closure());
+ var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure());
+ var quote = SymbolToken('"');
+ var backSlash = SymbolToken('\\');
+ var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r');
+ var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4));
+ var escape = backSlash.Cat(specialEscapeChars.Or(unicodeEspace));
+ var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure();
+ var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace);
+ var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace);
+ var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace);
+ var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace);
+ var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace);
+ var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace);
+
+ var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional());
+ var literal = letters.Closure();
+ var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x));
+ var character = unescaped.Or(escape);
+ var str = quote.Cat(character.EClosure()).Cat(quote);
+
+
+ var jsonExpression =
+ number.Tag(TokenType.Number)
+ .Or(literal.Tag(TokenType.Literal))
+ .Or(quote.Tag(TokenType.StringBound))
+ .Or(beginObject.Tag(TokenType.BeginObject))
+ .Or(endObject.Tag(TokenType.EndObject))
+ .Or(beginArray.Tag(TokenType.BeginArray))
+ .Or(endArray.Tag(TokenType.EndArray))
+ .Or(nameSep.Tag(TokenType.NameSeparator))
+ .Or(valueSep.Tag(TokenType.ValueSeparator));
+
+
+ var jsonStringExpression =
+ quote.Tag(TokenType.StringBound)
+ .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar))
+ .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode))
+ .Or(unescaped.Closure().Tag(TokenType.UnescapedChar));
+
+ var jsonNumberExpression =
+ minus.Tag(TokenType.Minus)
+ .Or(SymbolToken('+').Tag(TokenType.Plus))
+ .Or(digit.Closure().Tag(TokenType.Integer))
+ .Or(dot.Tag(TokenType.Dot))
+ .Or(expSign.Tag(TokenType.Exp));
+
+ m_jsonDFA = BuildDFA(jsonExpression);
+ m_stringDFA = BuildDFA(jsonStringExpression);
+ }
+
+ public CDFADefinition JsonDFA {
+ get {
+ return m_jsonDFA;
+ }
+ }
+
+ public CDFADefinition JsonStringDFA {
+ get {
+ return m_stringDFA;
+ }
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JSONParser.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JSONParser.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,197 @@
+using Implab;
+using Implab.Parsing;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ ///
+ /// internal
+ ///
+ public struct JSONParserContext {
+ public string memberName;
+ public JSONElementContext elementContext;
+ }
+
+ ///
+ /// Pull парсер JSON данных.
+ ///
+ public class JSONParser : DFAutomaton {
+
+ enum MemberContext {
+ MemberName,
+ MemberValue
+ }
+
+ static readonly EnumAlphabet _alphabet = EnumAlphabet.FullAlphabet;
+ static readonly DFAStateDescriptior[] _jsonDFA;
+ static readonly DFAStateDescriptior[] _objectDFA;
+ static readonly DFAStateDescriptior[] _arrayDFA;
+
+ static JSONParser() {
+ var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0);
+
+ var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String);
+ var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression);
+ var objectExpression = memberExpression
+ .Cat(
+ Token.New(JsonTokenType.ValueSeparator)
+ .Cat(memberExpression)
+ .EClosure()
+ )
+ .Optional()
+ .Cat(Token.New(JsonTokenType.EndObject))
+ .Tag(0);
+ var arrayExpression = valueExpression
+ .Cat(
+ Token.New(JsonTokenType.ValueSeparator)
+ .Cat(valueExpression)
+ .EClosure()
+ )
+ .Optional()
+ .Cat(Token.New(JsonTokenType.EndArray))
+ .Tag(0);
+
+ _jsonDFA = BuildDFA(jsonExpression).States;
+ _objectDFA = BuildDFA(objectExpression).States;
+ _arrayDFA = BuildDFA(arrayExpression).States;
+ }
+
+ static EDFADefinition BuildDFA(Token expr) {
+ var builder = new DFABuilder();
+ var dfa = new EDFADefinition(_alphabet);
+ expr.Accept(builder);
+
+ builder.BuildDFA(dfa);
+ return dfa;
+ }
+
+ JSONScanner m_scanner;
+ MemberContext m_memberContext;
+
+ JSONElementType m_elementType;
+ object m_elementValue;
+
+ public JSONParser(string text)
+ : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) {
+ Safe.ArgumentNotEmpty(text, "text");
+ m_scanner = new JSONScanner();
+ m_scanner.Feed(text.ToCharArray());
+ }
+
+ public JSONElementType ElementType {
+ get { return m_elementType; }
+ }
+
+ public string ElementName {
+ get { return m_context.info.memberName; }
+ }
+
+ public object ElementValue {
+ get { return m_elementValue; }
+ }
+
+ public bool Read() {
+ if (m_context.current == UNREACHEBLE_STATE)
+ throw new InvalidOperationException("The parser is in invalid state");
+ object tokenValue;
+ JsonTokenType tokenType;
+ m_context.info.memberName = String.Empty;
+ while (m_scanner.ReadToken(out tokenValue, out tokenType)) {
+ Move((int)tokenType);
+ if (m_context.current == UNREACHEBLE_STATE)
+ UnexpectedToken(tokenValue, tokenType);
+ switch (tokenType) {
+ case JsonTokenType.BeginObject:
+ Switch(
+ _objectDFA,
+ INITIAL_STATE,
+ new JSONParserContext {
+ memberName = m_context.info.memberName,
+ elementContext = JSONElementContext.Object
+ }
+ );
+ m_elementValue = null;
+ m_memberContext = MemberContext.MemberName;
+ m_elementType = JSONElementType.BeginObject;
+ return true;
+ case JsonTokenType.EndObject:
+ Restore();
+ m_elementValue = null;
+ m_elementType = JSONElementType.EndObject;
+ return true;
+ case JsonTokenType.BeginArray:
+ Switch(
+ _arrayDFA,
+ INITIAL_STATE,
+ new JSONParserContext {
+ memberName = m_context.info.memberName,
+ elementContext = JSONElementContext.Array
+ }
+ );
+ m_elementValue = null;
+ m_memberContext = MemberContext.MemberValue;
+ m_elementType = JSONElementType.BeginArray;
+ return true;
+ case JsonTokenType.EndArray:
+ Restore();
+ m_elementValue = null;
+ m_elementType = JSONElementType.EndArray;
+ return true;
+ case JsonTokenType.String:
+ if (m_memberContext == MemberContext.MemberName) {
+ m_context.info.memberName = (string)tokenValue;
+ break;
+ } else {
+ m_elementType = JSONElementType.Value;
+ m_elementValue = tokenValue;
+ return true;
+ }
+ case JsonTokenType.Number:
+ m_elementType = JSONElementType.Value;
+ m_elementValue = tokenValue;
+ return true;
+ case JsonTokenType.Literal:
+ m_elementType = JSONElementType.Value;
+ m_elementValue = ParseLiteral((string)tokenValue);
+ return true;
+ case JsonTokenType.NameSeparator:
+ m_memberContext = MemberContext.MemberValue;
+ break;
+ case JsonTokenType.ValueSeparator:
+ m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue;
+ break;
+ default:
+ UnexpectedToken(tokenValue, tokenType);
+ break;
+ }
+ }
+ if (m_context.info.elementContext != JSONElementContext.None)
+ throw new ParserException("Unexpedted end of data");
+ return false;
+ }
+
+ object ParseLiteral(string literal) {
+ switch (literal) {
+ case "null":
+ return null;
+ case "false" :
+ return false;
+ case "true":
+ return true;
+ default:
+ UnexpectedToken(literal, JsonTokenType.Literal);
+ return null; // avoid compliler error
+ }
+ }
+
+ void UnexpectedToken(object value, JsonTokenType tokenType) {
+ throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value));
+ }
+
+ }
+
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JSONScanner.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JSONScanner.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,89 @@
+using Implab.Parsing;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ ///
+ /// Сканнер, разбивающий поток символов на токены JSON.
+ ///
+ public class JSONScanner : Scanner {
+ char[] m_stringBuffer;
+ DFAStateDescriptior[] m_stringDFA;
+ int[] m_stringAlphabet;
+
+ public JSONScanner()
+ : base(JSONGrammar.Instance.JsonDFA) {
+ m_stringBuffer = new char[1024];
+ var dfa = JSONGrammar.Instance.JsonStringDFA;
+ m_stringAlphabet = dfa.Alphabet.GetTranslationMap();
+ m_stringDFA = dfa.States;
+ }
+
+ public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
+ if (ReadTokenInternal()) {
+ switch ((JSONGrammar.TokenType)m_currentState.tag[0]) {
+ case JSONGrammar.TokenType.StringBound:
+ tokenValue = ReadString();
+ tokenType = JsonTokenType.String;
+ break;
+ case JSONGrammar.TokenType.Number:
+ tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture);
+ tokenType = JsonTokenType.Number;
+ break;
+ default:
+ tokenType = (JsonTokenType)m_currentState.tag[0];
+ tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen);
+ break;
+ }
+ return true;
+ }
+ tokenValue = null;
+ tokenType = JsonTokenType.None;
+ return false;
+ }
+
+ string ReadString() {
+ int pos = 0;
+ Switch(m_stringDFA, m_stringAlphabet);
+ while (ReadTokenInternal()) {
+ switch ((JSONGrammar.TokenType)m_currentState.tag[0]) {
+ case JSONGrammar.TokenType.StringBound:
+ Restore();
+ return new String(m_stringBuffer, 0, pos);
+ case JSONGrammar.TokenType.UnescapedChar:
+ EnsureStringBufferSize(pos + m_tokenLen);
+ Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen);
+ pos += m_tokenLen;
+ break;
+ case JSONGrammar.TokenType.EscapedUnicode:
+ EnsureStringBufferSize(pos + 1);
+ m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2);
+ pos++;
+ break;
+ case JSONGrammar.TokenType.EscapedChar:
+ EnsureStringBufferSize(pos + 1);
+ m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]);
+ pos++;
+ break;
+ default:
+ break;
+ }
+
+ }
+
+ throw new ParserException("Unexpected end of data");
+ }
+
+ void EnsureStringBufferSize(int size) {
+ if (size > m_stringBuffer.Length) {
+ var newBuffer = new char[size];
+ m_stringBuffer.CopyTo(newBuffer, 0);
+ m_stringBuffer = newBuffer;
+ }
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JSONWriter.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JSONWriter.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,227 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ public class JSONWriter {
+ struct Context {
+ public bool needComma;
+ public JSONElementContext element;
+ }
+ Stack m_contextStack = new Stack();
+ Context m_context;
+
+ TextWriter m_writer;
+ bool m_indent;
+
+ static readonly char [] _escapeBKS,
+ _escapeFWD,
+ _escapeCR,
+ _escapeNL,
+ _escapeTAB,
+ _escapeSLASH,
+ _escapeBSLASH,
+ _escapeQ;
+
+ static JSONWriter() {
+ _escapeBKS = "\\b".ToCharArray();
+ _escapeFWD = "\\f".ToCharArray();
+ _escapeCR = "\\r".ToCharArray();
+ _escapeNL = "\\n".ToCharArray();
+ _escapeTAB = "\\t".ToCharArray();
+ _escapeBSLASH = "\\\\".ToCharArray();
+ _escapeSLASH = "\\/".ToCharArray();
+ _escapeQ = "\\\"".ToCharArray();
+ }
+
+ public JSONWriter(TextWriter writer) {
+ Safe.ArgumentNotNull(writer, "writer");
+
+ m_writer = writer;
+ }
+
+ void WriteMemberName(string name) {
+ Safe.ArgumentNotEmpty(name, "name");
+ if (m_context.element != JSONElementContext.Object)
+ OperationNotApplicable("WriteMember");
+ if (m_context.needComma)
+ m_writer.Write(", ");
+ // TODO indent
+ m_context.needComma = true;
+ Write(name);
+ m_writer.Write(" : ");
+ }
+
+ public void WriteValue(string name, string value) {
+ WriteMemberName(name);
+ Write(value);
+ }
+
+ public void WriteValue(string name, bool value) {
+ WriteMemberName(name);
+ Write(value);
+ }
+
+ public void WriteValue(string name, double value) {
+ WriteMemberName(name);
+ Write(value);
+ }
+
+
+
+ public void WriteValue(string value) {
+ if (m_context.element != JSONElementContext.Array)
+ OperationNotApplicable("WriteValue");
+ if (m_context.needComma)
+ m_writer.Write(", ");
+ m_context.needComma = true;
+
+ Write(value);
+ }
+
+ public void WriteValue(bool value) {
+ if (m_context.element != JSONElementContext.Array)
+ OperationNotApplicable("WriteValue");
+ if (m_context.needComma)
+ m_writer.Write(", ");
+ m_context.needComma = true;
+
+ Write(value);
+ }
+
+ public void WriteValue(double value) {
+ if (m_context.element != JSONElementContext.Array)
+ OperationNotApplicable("WriteValue");
+ if (m_context.needComma)
+ m_writer.Write(", ");
+ m_context.needComma = true;
+
+ Write(value);
+ }
+
+ public void BeginObject() {
+ if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array)
+ OperationNotApplicable("BeginObject");
+ if (m_context.needComma)
+ m_writer.Write(", ");
+ m_context.needComma = true;
+
+ m_contextStack.Push(m_context);
+
+ m_context = new Context { element = JSONElementContext.Object, needComma = false };
+ m_writer.Write("{ ");
+ }
+
+ public void BeginObject(string name) {
+ WriteMemberName(name);
+
+ m_contextStack.Push(m_context);
+
+ m_context = new Context { element = JSONElementContext.Object, needComma = false };
+ m_writer.Write("{ ");
+ }
+
+ public void EndObject() {
+ if (m_context.element != JSONElementContext.Object)
+ OperationNotApplicable("EndArray");
+
+ m_writer.Write(" }");
+ m_context = m_contextStack.Pop();
+ }
+
+ public void BeginArray() {
+ if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array)
+ throw new InvalidOperationException();
+ if (m_context.needComma)
+ m_writer.Write(", ");
+ m_context.needComma = true;
+
+ m_contextStack.Push(m_context);
+
+ m_context = new Context { element = JSONElementContext.Array, needComma = false };
+ m_writer.Write("[ ");
+ }
+
+ public void BeginArray(string name) {
+ WriteMemberName(name);
+
+ m_contextStack.Push(m_context);
+
+ m_context = new Context { element = JSONElementContext.Array, needComma = false };
+ m_writer.Write("[ ");
+ }
+
+ public void EndArray() {
+ if (m_context.element != JSONElementContext.Array)
+ OperationNotApplicable("EndArray");
+
+ m_writer.Write(" ]");
+ m_context = m_contextStack.Pop();
+ }
+
+ void Write(bool value) {
+ m_writer.Write(value ? "true" : "false");
+ }
+
+
+ void Write(string value) {
+ if (value == null)
+ m_writer.Write("null");
+
+ var chars = value.ToCharArray();
+ m_writer.Write('"');
+
+ for (int i = 0; i < chars.Length; i++) {
+ var ch = chars[i];
+
+ switch (ch) {
+ case '\b':
+ m_writer.Write(_escapeBKS);
+ break;
+ case '\f':
+ m_writer.Write(_escapeFWD);
+ break;
+ case '\r':
+ m_writer.Write(_escapeCR);
+ break;
+ case '\n':
+ m_writer.Write(_escapeNL);
+ break;
+ case '\t':
+ m_writer.Write(_escapeTAB);
+ break;
+ case '\\':
+ m_writer.Write(_escapeBSLASH);
+ break;
+ case '/':
+ m_writer.Write(_escapeSLASH);
+ break;
+ case '"':
+ m_writer.Write(_escapeQ);
+ break;
+ default:
+ if (ch < 0x20) {
+ m_writer.Write("\\u00{0:x2}",(int)ch);
+ } else {
+ m_writer.Write(ch);
+ }
+ break;
+ }
+ }
+
+ m_writer.Write('"');
+ }
+
+ void Write(double value) {
+ m_writer.Write(value);
+ }
+
+ void OperationNotApplicable(string opName) {
+ throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element ));
+ }
+
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/JsonTokenType.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/JsonTokenType.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,50 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ ///
+ /// Тип токенов, возвращаемых .
+ ///
+ public enum JsonTokenType : int {
+ None = 0,
+ ///
+ /// Начало объекта
+ ///
+ BeginObject,
+ ///
+ /// Конец объекта
+ ///
+ EndObject,
+ ///
+ /// Начало массива
+ ///
+ BeginArray,
+ ///
+ /// Конец массива
+ ///
+ EndArray,
+ ///
+ /// Строка
+ ///
+ String,
+ ///
+ /// Число
+ ///
+ Number,
+ ///
+ /// Литерал
+ ///
+ Literal,
+ ///
+ /// Разделитель имени :
+ ///
+ NameSeparator,
+ ///
+ /// Разделитель имени ,
+ ///
+ ValueSeparator
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/JSON/StringTranslator.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/JSON/StringTranslator.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,96 @@
+using Implab;
+using Implab.Parsing;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.JSON {
+ ///
+ /// Класс для преобразования экранированной строки JSON
+ ///
+ public class StringTranslator : Scanner {
+ static readonly char[] _escMap;
+ static readonly int[] _hexMap;
+
+ static StringTranslator() {
+ var chars = new char[] { 'b', 'f', 't', 'r', 'n', '\\', '/' };
+ var vals = new char[] { '\b', '\f', '\t', '\r', '\n', '\\', '/' };
+
+ _escMap = new char[chars.Max() + 1];
+
+ for (int i = 0; i < chars.Length; i++)
+ _escMap[chars[i]] = vals[i];
+
+ var hexs = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' };
+ var ints = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15 };
+
+ _hexMap = new int[hexs.Max() + 1];
+
+ for (int i = 0; i < hexs.Length; i++)
+ _hexMap[hexs[i]] = ints[i];
+
+ }
+
+ public StringTranslator()
+ : base(JSONGrammar.Instance.JsonStringDFA) {
+ }
+
+ public string Translate(string data) {
+ Safe.ArgumentNotNull(data, "data");
+ return Translate(data.ToCharArray());
+ }
+
+ public string Translate(char[] data) {
+ Safe.ArgumentNotNull(data, "data");
+ return Translate(data, data.Length);
+ }
+
+ public string Translate(char[] data, int length) {
+ Safe.ArgumentNotNull(data, "data");
+ Safe.ArgumentInRange(length, 0, data.Length, "length");
+
+ var translated = new char[length];
+
+ Feed(data,length);
+
+ int pos = 0;
+
+ while (ReadTokenInternal()) {
+ switch ((JSONGrammar.TokenType)TokenTags[0]) {
+ case JSONGrammar.TokenType.UnescapedChar:
+ Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen);
+ pos += m_tokenLen;
+ break;
+ case JSONGrammar.TokenType.EscapedChar:
+ translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]];
+ pos++;
+ break;
+ case JSONGrammar.TokenType.EscapedUnicode:
+ translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2);
+ pos++;
+ break;
+ }
+ }
+
+ return new String(translated, 0, pos);
+ }
+
+ internal static char TranslateEscapedChar(char symbol) {
+ return _escMap[symbol];
+ }
+
+ internal static char TranslateHexUnicode(char[] symbols, int offset) {
+ Debug.Assert(symbols != null);
+ Debug.Assert(symbols.Length - offset >= 4);
+
+ int value = (_hexMap[symbols[offset]] << 12)
+ | (_hexMap[symbols[offset + 1]] << 8)
+ | (_hexMap[symbols[offset + 2]] << 4)
+ | (_hexMap[symbols[offset + 3]]);
+ return (char)value;
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/Alphabet.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/Alphabet.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,23 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public class Alphabet: AlphabetBase {
+
+ public override int GetSymbolIndex(char symbol) {
+ return symbol;
+ }
+
+ public override IEnumerable InputSymbols {
+ get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); }
+ }
+
+ protected override int MapSize {
+ get { return char.MaxValue + 1; }
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/AlphabetBase.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/AlphabetBase.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,103 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public abstract class AlphabetBase : IAlphabet {
+ public const int UNCLASSIFIED = 0;
+
+ int m_nextId = 1;
+ int[] m_map;
+
+ public int Count {
+ get { return m_nextId; }
+ }
+
+ protected AlphabetBase() {
+ m_map = new int[MapSize];
+ }
+
+ protected AlphabetBase(int[] map) {
+ Debug.Assert(map != null);
+ Debug.Assert(map.Length == MapSize);
+
+ m_map = map;
+ m_nextId = map.Max() + 1;
+ }
+
+ public int DefineSymbol(T symbol) {
+ var index = GetSymbolIndex(symbol);
+ if (m_map[index] == UNCLASSIFIED)
+ m_map[index] = m_nextId++;
+ return m_map[index];
+ }
+
+ public int DefineClass(IEnumerable symbols) {
+ Safe.ArgumentNotNull(symbols, "symbols");
+ symbols = symbols.Distinct();
+
+ foreach (var symbol in symbols) {
+ var index = GetSymbolIndex(symbol);
+ if (m_map[index] == UNCLASSIFIED)
+ m_map[GetSymbolIndex(symbol)] = m_nextId;
+ else
+ throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol));
+ }
+ return m_nextId++;
+ }
+
+ public List[] CreateReverseMap() {
+ return
+ Enumerable.Range(UNCLASSIFIED, Count)
+ .Select(
+ i => InputSymbols
+ .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i)
+ .ToList()
+ )
+ .ToArray();
+ }
+
+ public int[] Reclassify(IAlphabet newAlphabet, IEnumerable> classes) {
+ Safe.ArgumentNotNull(newAlphabet, "newAlphabet");
+ Safe.ArgumentNotNull(classes, "classes");
+ var reverseMap = CreateReverseMap();
+
+ int[] translationMap = new int[Count];
+
+ foreach (var scl in classes) {
+ // skip if the supper class contains the unclassified element
+ if (scl.Contains(UNCLASSIFIED))
+ continue;
+ var range = new List();
+ foreach (var cl in scl) {
+ if (cl < 0 || cl >= reverseMap.Length)
+ throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl));
+ range.AddRange(reverseMap[cl]);
+ }
+ var newClass = newAlphabet.DefineClass(range);
+ foreach (var cl in scl)
+ translationMap[cl] = newClass;
+ }
+
+ return translationMap;
+ }
+
+ public int Translate(T symbol) {
+ return m_map[GetSymbolIndex(symbol)];
+ }
+
+ public abstract int GetSymbolIndex(T symbol);
+
+ public abstract IEnumerable InputSymbols { get; }
+
+ protected abstract int MapSize { get; }
+
+ public int[] GetTranslationMap() {
+ return m_map;
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/AltToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/AltToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,22 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public class AltToken: BinaryToken {
+ public AltToken(Token left, Token right)
+ : base(left, right) {
+ }
+
+ public override void Accept(IVisitor visitor) {
+ Safe.ArgumentNotNull(visitor, "visitor");
+ visitor.Visit(this);
+ }
+ public override string ToString() {
+ return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right);
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/BinaryToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/BinaryToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,26 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public abstract class BinaryToken : Token {
+ Token m_left;
+ Token m_right;
+
+ public Token Left {
+ get { return m_left; }
+ }
+
+ public Token Right {
+ get { return m_right; }
+ }
+
+ protected BinaryToken(Token left, Token right) {
+ Safe.ArgumentNotNull(m_left = left, "left");
+ Safe.ArgumentNotNull(m_right = right, "right");
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/CDFADefinition.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/CDFADefinition.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,36 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public class CDFADefinition : DFADefinitionBase {
+ Alphabet m_alphabet;
+
+ public Alphabet Alphabet {
+ get { return m_alphabet; }
+ }
+
+ public override int AlphabetSize {
+ get { return m_alphabet.Count; }
+ }
+
+ public CDFADefinition(Alphabet alphabet): base() {
+ Safe.ArgumentNotNull(alphabet, "alphabet");
+ m_alphabet = alphabet;
+ }
+
+ public CDFADefinition Optimize() {
+ var optimized = new CDFADefinition(new Alphabet());
+
+ Optimize(optimized, m_alphabet, optimized.Alphabet);
+ return optimized;
+ }
+
+ public void PrintDFA() {
+ PrintDFA(m_alphabet);
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/CatToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/CatToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,27 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public class CatToken : BinaryToken {
+ public CatToken(Token left, Token right)
+ : base(left, right) {
+ }
+
+ public override void Accept(IVisitor visitor) {
+ Safe.ArgumentNotNull(visitor, "visitor");
+ visitor.Visit(this);
+ }
+
+ public override string ToString() {
+ return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right));
+ }
+
+ string FormatToken(Token token) {
+ return String.Format(token is AltToken ? "({0})" : "{0}", token);
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/DFABuilder.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/DFABuilder.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,182 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Используется для построения ДКА по регулярному выражению, сначала обходит
+ /// регулярное выражение и вычисляет followpos, затем используется метод
+ /// для построения автомата.
+ ///
+ public class DFABuilder : IVisitor {
+ int m_idx = 0;
+ Token m_root;
+ HashSet m_firstpos;
+ HashSet m_lastpos;
+
+ Dictionary> m_followpos = new Dictionary>();
+ Dictionary m_indexes = new Dictionary();
+ Dictionary m_ends = new Dictionary();
+
+ public Dictionary> FollowposMap {
+ get { return m_followpos; }
+ }
+
+ public HashSet Followpos(int pos) {
+ HashSet set;
+ if (m_followpos.TryGetValue(pos, out set))
+ return set;
+ return m_followpos[pos] = new HashSet();
+ }
+
+ bool Nullable(object n) {
+ if (n is EmptyToken || n is StarToken)
+ return true;
+ if (n is AltToken)
+ return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right);
+ if (n is CatToken)
+ return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right);
+ return false;
+ }
+
+
+ public void Visit(AltToken token) {
+ if (m_root == null)
+ m_root = token;
+ var firtspos = new HashSet();
+ var lastpos = new HashSet();
+
+ token.Left.Accept(this);
+ firtspos.UnionWith(m_firstpos);
+ lastpos.UnionWith(m_lastpos);
+
+ token.Right.Accept(this);
+ firtspos.UnionWith(m_firstpos);
+ lastpos.UnionWith(m_lastpos);
+
+ m_firstpos = firtspos;
+ m_lastpos = lastpos;
+ }
+
+ public void Visit(StarToken token) {
+ if (m_root == null)
+ m_root = token;
+ token.Token.Accept(this);
+
+ foreach (var i in m_lastpos)
+ Followpos(i).UnionWith(m_firstpos);
+ }
+
+ public void Visit(CatToken token) {
+ if (m_root == null)
+ m_root = token;
+
+ var firtspos = new HashSet();
+ var lastpos = new HashSet();
+ token.Left.Accept(this);
+ firtspos.UnionWith(m_firstpos);
+ var leftLastpos = m_lastpos;
+
+ token.Right.Accept(this);
+ lastpos.UnionWith(m_lastpos);
+ var rightFirstpos = m_firstpos;
+
+ if (Nullable(token.Left))
+ firtspos.UnionWith(rightFirstpos);
+
+ if (Nullable(token.Right))
+ lastpos.UnionWith(leftLastpos);
+
+ m_firstpos = firtspos;
+ m_lastpos = lastpos;
+
+ foreach (var i in leftLastpos)
+ Followpos(i).UnionWith(rightFirstpos);
+
+ }
+
+ public void Visit(EmptyToken token) {
+ if (m_root == null)
+ m_root = token;
+ ;
+ }
+
+ public void Visit(SymbolToken token) {
+ if (m_root == null)
+ m_root = token;
+ m_idx++;
+ m_indexes[m_idx] = token.Value;
+ m_firstpos = new HashSet(new[] { m_idx });
+ m_lastpos = new HashSet(new[] { m_idx });
+ }
+
+ public void Visit(EndToken token) {
+ if (m_root == null)
+ m_root = token;
+ m_idx++;
+ m_indexes[m_idx] = Alphabet.UNCLASSIFIED;
+ m_firstpos = new HashSet(new[] { m_idx });
+ m_lastpos = new HashSet(new[] { m_idx });
+ Followpos(m_idx);
+ m_ends.Add(m_idx, token.Tag);
+ }
+
+ public void BuildDFA(IDFADefinition dfa) {
+ Safe.ArgumentNotNull(dfa,"dfa");
+
+ var stateMap = new Dictionary, int>(new CustomEqualityComparer>(
+ (x, y) => x.SetEquals(y),
+ (x) => x.Sum(n => n.GetHashCode())
+ ));
+
+ stateMap[m_firstpos] = DefineState( dfa, m_firstpos);
+ Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE);
+
+ var queue = new Queue>();
+
+ queue.Enqueue(m_firstpos);
+
+ while (queue.Count > 0) {
+ var state = queue.Dequeue();
+ var s1 = stateMap[state];
+
+ for (int a = 0; a < dfa.AlphabetSize; a++) {
+ var next = new HashSet();
+ foreach (var p in state) {
+ if (m_indexes[p] == a) {
+ next.UnionWith(Followpos(p));
+ }
+ }
+ if (next.Count > 0) {
+ int s2;
+ if (!stateMap.TryGetValue(next, out s2)) {
+ stateMap[next] = s2 = DefineState(dfa, next);
+ queue.Enqueue(next);
+ }
+ dfa.DefineTransition(s1, s2, a);
+ }
+ }
+
+ }
+ }
+
+ int[] GetStateTags(HashSet state) {
+ Debug.Assert(state != null);
+ return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray();
+ }
+
+ int DefineState(IDFADefinition automa, HashSet state) {
+ Debug.Assert(automa != null);
+ Debug.Assert(state != null);
+
+ var tags = GetStateTags(state);
+
+ return tags.Length > 0 ? automa.AddState(tags) : automa.AddState();
+ }
+
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/DFADefinitionBase.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/DFADefinitionBase.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,262 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public abstract class DFADefinitionBase : IDFADefinition {
+ readonly List m_states;
+
+ public const int INITIAL_STATE = 1;
+ public const int UNREACHEBLE_STATE = 0;
+
+ DFAStateDescriptior[] m_statesArray;
+
+ public DFADefinitionBase() {
+ m_states = new List();
+
+ m_states.Add(new DFAStateDescriptior());
+ }
+
+ public DFAStateDescriptior[] States {
+ get {
+ if (m_statesArray == null)
+ m_statesArray = m_states.ToArray();
+ return m_statesArray;
+ }
+ }
+
+ public bool InitialStateIsFinal {
+ get {
+ return m_states[INITIAL_STATE].final;
+ }
+ }
+
+ public int AddState() {
+ var index = m_states.Count;
+ m_states.Add(new DFAStateDescriptior {
+ final = false,
+ transitions = new int[AlphabetSize]
+ });
+
+ return index;
+ }
+
+ public int AddState(int[] tag) {
+ var index = m_states.Count;
+ bool final = tag == null || tag.Length == 0 ? false : true;
+ m_states.Add(new DFAStateDescriptior {
+ final = final,
+ transitions = new int[AlphabetSize],
+ tag = final ? tag : null
+ });
+ return index;
+ }
+
+ public void DefineTransition(int s1,int s2, int symbol) {
+ Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1");
+ Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2");
+ Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol");
+
+ m_states[s1].transitions[symbol] = s2;
+ }
+
+ protected void Optimize(IDFADefinition minimalDFA,IAlphabet sourceAlphabet, IAlphabet minimalAlphabet) {
+ Safe.ArgumentNotNull(minimalDFA, "minimalDFA");
+ Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet");
+
+ var setComparer = new CustomEqualityComparer>(
+ (x, y) => x.SetEquals(y),
+ (s) => s.Sum(x => x.GetHashCode())
+ );
+
+ var arrayComparer = new CustomEqualityComparer(
+ (x,y) => (new HashSet(x)).SetEquals(new HashSet(y)),
+ (a) => a.Sum(x => x.GetHashCode())
+ );
+
+ var optimalStates = new HashSet>(setComparer);
+ var queue = new HashSet>(setComparer);
+
+ foreach (var g in Enumerable
+ .Range(INITIAL_STATE, m_states.Count-1)
+ .Select(i => new {
+ index = i,
+ descriptor = m_states[i]
+ })
+ .Where(x => x.descriptor.final)
+ .GroupBy(x => x.descriptor.tag, arrayComparer)
+ ) {
+ optimalStates.Add(new HashSet(g.Select(x => x.index)));
+ }
+
+ var state = new HashSet(
+ Enumerable
+ .Range(INITIAL_STATE, m_states.Count - 1)
+ .Where(i => !m_states[i].final)
+ );
+ optimalStates.Add(state);
+ queue.Add(state);
+
+ while (queue.Count > 0) {
+ var stateA = queue.First();
+ queue.Remove(stateA);
+
+ for (int c = 0; c < AlphabetSize; c++) {
+ var stateX = new HashSet();
+
+ for(int s = 1; s < m_states.Count; s++) {
+ if (stateA.Contains(m_states[s].transitions[c]))
+ stateX.Add(s);
+ }
+
+ foreach (var stateY in optimalStates.ToArray()) {
+ if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) {
+ var stateR1 = new HashSet(stateY);
+ var stateR2 = new HashSet(stateY);
+
+ stateR1.IntersectWith(stateX);
+ stateR2.ExceptWith(stateX);
+
+ optimalStates.Remove(stateY);
+ optimalStates.Add(stateR1);
+ optimalStates.Add(stateR2);
+
+ if (queue.Contains(stateY)) {
+ queue.Remove(stateY);
+ queue.Add(stateR1);
+ queue.Add(stateR2);
+ } else {
+ queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2);
+ }
+ }
+ }
+ }
+ }
+
+ // строим карты соотвествия оптимальных состояний с оригинальными
+
+ var initialState = optimalStates.Where(x => x.Contains(INITIAL_STATE)).Single();
+
+ // карта получения оптимального состояния по соотвествующему ему простому состоянию
+ int[] reveseOptimalMap = new int[m_states.Count];
+ // карта с индексами оптимальных состояний
+ HashSet[] optimalMap = new HashSet[optimalStates.Count + 1];
+ {
+ optimalMap[0] = new HashSet(); // unreachable state
+ optimalMap[1] = initialState; // initial state
+ foreach (var ss in initialState)
+ reveseOptimalMap[ss] = 1;
+
+ int i = 2;
+ foreach (var s in optimalStates) {
+ if (s.SetEquals(initialState))
+ continue;
+ optimalMap[i] = s;
+ foreach (var ss in s)
+ reveseOptimalMap[ss] = i;
+ i++;
+ }
+ }
+
+ // получаем минимальный алфавит
+
+ var minClasses = new HashSet>(setComparer);
+ var alphaQueue = new Queue>();
+ alphaQueue.Enqueue(new HashSet(Enumerable.Range(0,AlphabetSize)));
+
+ for (int s = 1 ; s < optimalMap.Length; s++) {
+ var newQueue = new Queue>();
+
+ foreach (var A in alphaQueue) {
+ if (A.Count == 1) {
+ minClasses.Add(A);
+ continue;
+ }
+
+ // различаем классы символов, которые переводят в различные оптимальные состояния
+ // optimalState -> alphaClass
+ var classes = new Dictionary>();
+
+ foreach (var term in A) {
+ // ищем все переходы класса по символу term
+ var s2 = reveseOptimalMap[
+ optimalMap[s].Select(x => m_states[x].transitions[term]) // все элементарные состояния, куда переходит класс s
+ .Where(x => x != 0) // только допустимые
+ .FirstOrDefault() // первое допустимое элементарное состояние, если есть
+ ];
+
+ HashSet A2;
+ if (!classes.TryGetValue(s2, out A2)) {
+ A2 = new HashSet();
+ newQueue.Enqueue(A2);
+ classes[s2] = A2;
+ }
+ A2.Add(term);
+ }
+ }
+
+ if (newQueue.Count == 0)
+ break;
+ alphaQueue = newQueue;
+ }
+
+ foreach (var A in alphaQueue)
+ minClasses.Add(A);
+
+ var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses);
+
+ // построение автомата
+
+ var states = new int[ optimalMap.Length ];
+ states[0] = UNREACHEBLE_STATE;
+
+ for(var s = INITIAL_STATE; s < states.Length; s++) {
+ var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty()).Distinct().ToArray();
+ if (tags.Length > 0)
+ states[s] = minimalDFA.AddState(tags);
+ else
+ states[s] = minimalDFA.AddState();
+ }
+
+ Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE);
+
+ for (int s1 = 1; s1 < m_states.Count; s1++) {
+ for (int c = 0; c < AlphabetSize; c++) {
+ var s2 = m_states[s1].transitions[c];
+ if (s2 != UNREACHEBLE_STATE) {
+ minimalDFA.DefineTransition(
+ reveseOptimalMap[s1],
+ reveseOptimalMap[s2],
+ alphabetMap[c]
+ );
+ }
+ }
+ }
+
+ }
+
+ protected void PrintDFA(IAlphabet alphabet) {
+
+ var reverseMap = alphabet.CreateReverseMap();
+
+ for (int i = 1; i < reverseMap.Length; i++) {
+ Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i]));
+ }
+
+ for (int i = 1; i < m_states.Count; i++) {
+ var s = m_states[i];
+ for (int c = 0; c < AlphabetSize; c++)
+ if (s.transitions[c] != UNREACHEBLE_STATE)
+ Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : "");
+ }
+ }
+
+ public abstract int AlphabetSize {
+ get;
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/DFAStateDescriptor.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/DFAStateDescriptor.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,13 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public struct DFAStateDescriptior {
+ public bool final;
+ public int[] tag;
+ public int[] transitions;
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/DFAutomaton.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/DFAutomaton.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,56 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public abstract class DFAutomaton {
+ protected struct ContextFrame {
+ public DFAStateDescriptior[] states;
+ public int current;
+ public T info;
+ }
+
+ public const int INITIAL_STATE = DFADefinitionBase.INITIAL_STATE;
+ public const int UNREACHEBLE_STATE = DFADefinitionBase.UNREACHEBLE_STATE;
+
+ protected ContextFrame m_context;
+ Stack m_contextStack = new Stack();
+
+ public int Level {
+ get { return m_contextStack.Count; }
+ }
+
+ protected DFAutomaton(DFAStateDescriptior[] states, int startState, T info) {
+ Safe.ArgumentNotNull(states, "states");
+ Safe.ArgumentInRange(startState, 0, states.Length - 1, "startState");
+
+ m_context.states = states;
+ m_context.current = startState;
+ m_context.info = info;
+ }
+
+ protected void Switch(DFAStateDescriptior[] states, int current, T info) {
+ Debug.Assert(states != null);
+ Debug.Assert(current >= 0 && current < states.Length);
+ m_contextStack.Push(m_context);
+ m_context. states = states;
+ m_context.current = current;
+ m_context.info = info;
+ }
+
+ protected void Restore() {
+ Debug.Assert(m_contextStack.Count > 0);
+
+ m_context = m_contextStack.Pop();
+ }
+
+ protected void Move(int input) {
+ Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length);
+ m_context.current = m_context.states[m_context.current].transitions[input];
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/EDFADefinition.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/EDFADefinition.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,37 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public class EDFADefinition : DFADefinitionBase where T : struct, IConvertible {
+ EnumAlphabet m_alphabet;
+
+ public EnumAlphabet Alphabet {
+ get { return m_alphabet; }
+ }
+
+ public EDFADefinition(EnumAlphabet alphabet)
+ : base() {
+ Safe.ArgumentNotNull(alphabet, "alphabet");
+ m_alphabet = alphabet;
+ }
+
+ public override int AlphabetSize {
+ get { return m_alphabet.Count; }
+ }
+
+ public EDFADefinition Optimize() {
+ var optimized = new EDFADefinition(new EnumAlphabet());
+ Optimize(optimized, m_alphabet, optimized.Alphabet);
+
+ return optimized;
+ }
+
+ public void PrintDFA() {
+ PrintDFA(m_alphabet);
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/EmptyToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/EmptyToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,18 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public class EmptyToken : Token {
+ public override void Accept(IVisitor visitor) {
+ Safe.ArgumentNotNull(visitor, "visitor");
+ visitor.Visit(this);
+ }
+ public override string ToString() {
+ return "$";
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/EndToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/EndToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,37 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Конечный символ расширенного регулярного выражения, при построении ДКА
+ /// используется для определения конечных состояний.
+ ///
+ public class EndToken: Token {
+
+ int m_tag;
+
+ public EndToken(int tag) {
+ m_tag = tag;
+ }
+
+ public EndToken()
+ : this(0) {
+ }
+
+ public int Tag {
+ get { return m_tag; }
+ }
+
+ public override void Accept(IVisitor visitor) {
+ Safe.ArgumentNotNull(visitor, "visitor");
+ visitor.Visit(this);
+ }
+ public override string ToString() {
+ return "#";
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/EnumAlphabet.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/EnumAlphabet.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,68 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Алфавит символами которого являются элементы перечислений.
+ ///
+ /// Тип перечислений
+ public class EnumAlphabet : AlphabetBase where T : struct, IConvertible {
+ static readonly T[] _symbols;
+ static readonly EnumAlphabet _fullAlphabet;
+
+ static EnumAlphabet() {
+ if (!typeof(T).IsEnum)
+ throw new InvalidOperationException("Invalid generic parameter, enumeration is required");
+
+ if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32))
+ throw new InvalidOperationException("Only enums based on Int32 are supported");
+
+ _symbols = ((T[])Enum.GetValues(typeof(T)))
+ .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture))
+ .ToArray();
+
+ if (
+ _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length
+ || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0
+ )
+ throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered");
+
+ _fullAlphabet = new EnumAlphabet(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray());
+ }
+
+
+
+ public static EnumAlphabet FullAlphabet {
+ get {
+ return _fullAlphabet;
+ }
+ }
+
+
+ public EnumAlphabet()
+ : base() {
+ }
+
+ public EnumAlphabet(int[] map)
+ : base(map) {
+ }
+
+
+ public override int GetSymbolIndex(T symbol) {
+ return symbol.ToInt32(CultureInfo.InvariantCulture);
+ }
+
+ public override IEnumerable InputSymbols {
+ get { return _symbols; }
+ }
+
+ protected override int MapSize {
+ get { return _symbols.Length; }
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/Grammar.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/Grammar.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,103 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа char.
+ ///
+ ///
+ public abstract class Grammar where TGrammar: Grammar, new() {
+ Alphabet m_alphabet = new Alphabet();
+ static TGrammar _instance;
+
+ public static TGrammar Instance{
+ get {
+ if (_instance == null)
+ _instance = new TGrammar();
+ return _instance;
+ }
+ }
+
+ public SymbolToken UnclassifiedToken() {
+ return new SymbolToken(Alphabet.UNCLASSIFIED);
+ }
+
+ public void DefineAlphabet(IEnumerable alphabet) {
+ Safe.ArgumentNotNull(alphabet, "alphabet");
+
+ foreach (var ch in alphabet)
+ m_alphabet.DefineSymbol(ch);
+ }
+ public Token SymbolRangeToken(char start, char end) {
+ return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x));
+ }
+
+ public Token SymbolToken(char symbol) {
+ return Token.New(TranslateOrAdd(symbol));
+ }
+
+ public Token SymbolToken(IEnumerable symbols) {
+ Safe.ArgumentNotNull(symbols, "symbols");
+
+ return Token.New(TranslateOrAdd(symbols).ToArray());
+ }
+
+ public Token SymbolSetToken(params char[] set) {
+ return SymbolToken(set);
+ }
+
+ int TranslateOrAdd(char ch) {
+ var t = m_alphabet.Translate(ch);
+ if (t == Alphabet.UNCLASSIFIED)
+ t = m_alphabet.DefineSymbol(ch);
+ return t;
+ }
+
+ IEnumerable TranslateOrAdd(IEnumerable symbols) {
+ return symbols.Distinct().Select(TranslateOrAdd);
+ }
+
+ int TranslateOrDie(char ch) {
+ var t = m_alphabet.Translate(ch);
+ if (t == Alphabet.UNCLASSIFIED)
+ throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch));
+ return t;
+ }
+
+ IEnumerable TranslateOrDie(IEnumerable symbols) {
+ return symbols.Distinct().Select(TranslateOrDie);
+ }
+
+ public Token SymbolTokenExcept(IEnumerable symbols) {
+ Safe.ArgumentNotNull(symbols, "symbols");
+
+ return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray());
+ }
+
+ protected CDFADefinition BuildDFA(Token lang) {
+ Safe.ArgumentNotNull(lang, "lang");
+
+ var dfa = new CDFADefinition(m_alphabet);
+
+ var builder = new DFABuilder();
+
+ lang.Accept( builder );
+
+ builder.BuildDFA(dfa);
+ if (dfa.InitialStateIsFinal)
+ throw new ApplicationException("The specified language contains empty token");
+
+ return dfa.Optimize();
+ }
+
+
+
+ //protected abstract TGrammar CreateInstance();
+ }
+
+
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/IAlphabet.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/IAlphabet.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,56 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Алфавит. Множество символов, которые разбиты на классы, при этом классы имеют непрерывную нумерацию,
+ /// что позволяет использовать их в качестве индексов массивов.
+ ///
+ /// Далее вимволами алфавита будем называть классы исходных символов.
+ /// Тип символов.
+ public interface IAlphabet {
+ ///
+ /// Количество символов в алфавите.
+ ///
+ int Count { get; }
+ ///
+ /// Добавляет новый символ в алфавит, если символ уже был добавлен, то
+ /// возвращается ранее сопоставленный с символом класс.
+ ///
+ /// Символ для добавления.
+ /// Индекс класса, который попоставлен с символом.
+ int DefineSymbol(TSymbol symbol);
+ ///
+ /// Доабвляем класс символов. Множеству указанных исходных символов
+ /// будет сопоставлен символ в алфавите.
+ ///
+ /// Множестов исходных символов
+ /// Идентификатор символа алфавита.
+ int DefineClass(IEnumerable symbols);
+ ///
+ /// Создает карту обратного сопоставления символа алфавита и сопоставленным
+ /// ему исходным символам.
+ ///
+ ///
+ List[] CreateReverseMap();
+ ///
+ /// Создает новый алфавит на основе текущего, горппируя его сиволы в более
+ /// крупные непересекающиеся классы символов.
+ ///
+ /// Новый, пустой алфавит, в котором быдут определены классы.
+ /// Множество классов символов текущего алфавита.
+ /// Карта для перехода символов текущего
+ /// алфавита к символам нового.
+ int[] Reclassify(IAlphabet newAlphabet, IEnumerable> classes);
+
+ ///
+ /// Преобразует входной символ в индекс символа из алфавита.
+ ///
+ /// Исходный символ
+ /// Индекс в алфавите
+ int Translate(TSymbol symobl);
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/IDFADefinition.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/IDFADefinition.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,36 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Интерфейс для определения ДКА, позволяет добавить состояния и определить переходы.
+ ///
+ public interface IDFADefinition {
+ ///
+ /// Добавляет состояние в автомат.
+ ///
+ /// Индекс добавленного состояния.
+ int AddState();
+ ///
+ /// Добавляет конечное состояние с указанными метками, если метки не заданы, то
+ /// добавленное состояние не будет конечным.
+ ///
+ /// Метки состояния.
+ /// Индекс добавленного состояния.
+ int AddState(int[] tags);
+ ///
+ /// Определяет переход между состояниями.
+ ///
+ /// Исходное состояние.
+ /// Конечное состояние.
+ /// Входной символ.
+ void DefineTransition(int s1, int s2, int input);
+ ///
+ /// Размер входного алфавита.
+ ///
+ int AlphabetSize { get; }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/IVisitor.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/IVisitor.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,19 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Интерфейс обходчика синтаксического дерева регулярного выражения
+ ///
+ public interface IVisitor {
+ void Visit(AltToken token);
+ void Visit(StarToken token);
+ void Visit(CatToken token);
+ void Visit(EmptyToken token);
+ void Visit(EndToken token);
+ void Visit(SymbolToken token);
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/ParserException.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/ParserException.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,17 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Implab.Parsing {
+ [Serializable]
+ public class ParserException : Exception {
+ public ParserException() { }
+ public ParserException(string message) : base(message) { }
+ public ParserException(string message, Exception inner) : base(message, inner) { }
+ protected ParserException(
+ System.Runtime.Serialization.SerializationInfo info,
+ System.Runtime.Serialization.StreamingContext context)
+ : base(info, context) { }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/Scanner.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/Scanner.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,207 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Базовый класс для разбора потока входных символов на токены.
+ ///
+ ///
+ /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два
+ /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения
+ /// конца токена и допустимости текущего символа.
+ ///
+ public class Scanner {
+ struct ScannerConfig {
+ public DFAStateDescriptior[] states;
+ public int[] alphabetMap;
+ }
+
+ Stack m_defs = new Stack();
+
+ DFAStateDescriptior[] m_states;
+ int[] m_alphabetMap;
+
+ protected DFAStateDescriptior m_currentState;
+ int m_previewCode;
+
+ protected int m_tokenLen = 0;
+ protected int m_tokenOffset;
+
+ protected char[] m_buffer;
+ protected int m_bufferSize;
+ protected int m_pointer;
+
+ public Scanner(CDFADefinition definition, string text) {
+ Safe.ArgumentNotNull(definition, "definition");
+ Safe.ArgumentNotEmpty(text, "text");
+
+ m_states = definition.States;
+ m_alphabetMap = definition.Alphabet.GetTranslationMap();
+
+ Feed(text.ToCharArray());
+ }
+
+ public Scanner(CDFADefinition definition) {
+ Safe.ArgumentNotNull(definition, "definition");
+
+ m_states = definition.States;
+ m_alphabetMap = definition.Alphabet.GetTranslationMap();
+
+ Feed(new char[0]);
+ }
+
+ ///
+ /// Заполняет входными данными буффер.
+ ///
+ /// Данные для обработки.
+ /// Копирование данных не происходит, переданный массив используется в
+ /// качестве входного буффера.
+ public void Feed(char[] data) {
+ Safe.ArgumentNotNull(data, "data");
+
+ Feed(data, data.Length);
+ }
+
+ ///
+ /// Заполняет буффур чтения входными данными.
+ ///
+ /// Данные для обработки.
+ /// Длина данных для обработки.
+ /// Копирование данных не происходит, переданный массив используется в
+ /// качестве входного буффера.
+ public void Feed(char[] data, int length) {
+ Safe.ArgumentNotNull(data, "data");
+ Safe.ArgumentInRange(length, 0, data.Length, "length");
+
+ m_pointer = -1;
+ m_buffer = data;
+ m_bufferSize = length;
+ Shift();
+ }
+
+ ///
+ /// Получает текущий токен в виде строки.
+ ///
+ ///
+ public string GetTokenValue() {
+ return new String(m_buffer, m_tokenOffset, m_tokenLen);
+ }
+
+ ///
+ /// Метки текущего токена, которые были назначены в регулярном выражении.
+ ///
+ public int[] TokenTags {
+ get {
+ return m_currentState.tag;
+ }
+ }
+
+ ///
+ /// Читает следующий токен, при этом указывает на начало токена,
+ /// на длину токена, - массив символов, в
+ /// котором находится токен.
+ ///
+ /// false - достигнут конец данных, токен не прочитан.
+ protected bool ReadTokenInternal() {
+ if (m_pointer >= m_bufferSize)
+ return false;
+
+ m_currentState = m_states[CDFADefinition.INITIAL_STATE];
+ m_tokenLen = 0;
+ m_tokenOffset = m_pointer;
+ int nextState = CDFADefinition.UNREACHEBLE_STATE;
+ do {
+ nextState = m_currentState.transitions[m_previewCode];
+ if (nextState == CDFADefinition.UNREACHEBLE_STATE) {
+ if (m_currentState.final)
+ return true;
+ else
+ throw new ParserException(
+ String.Format(
+ "Unexpected symbol '{0}', at pos {1}",
+ m_buffer[m_pointer],
+ Position
+ )
+ );
+ } else {
+ m_currentState = m_states[nextState];
+ m_tokenLen++;
+ }
+
+ } while (Shift());
+
+ // END OF DATA
+ if (!m_currentState.final)
+ throw new ParserException("Unexpected end of data");
+
+ return true;
+ }
+
+
+ bool Shift() {
+ m_pointer++;
+
+ if (m_pointer >= m_bufferSize) {
+ return ReadNextChunk();
+ }
+
+ m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+
+ return true;
+ }
+
+ ///
+ /// Вызывается по достижению конца входного буффера для получения
+ /// новых данных.
+ ///
+ /// true - новые двнные получены, можно продолжать обработку.
+ protected virtual bool ReadNextChunk() {
+ return false;
+ }
+
+ ///
+ /// Позиция сканнера во входном буфере
+ ///
+ public int Position {
+ get {
+ return m_pointer + 1;
+ }
+ }
+
+ ///
+ /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей
+ /// группировки.
+ ///
+ /// Таблица состояний нового ДКА
+ /// Таблица входных символов для нового ДКА
+ protected void Switch(DFAStateDescriptior[] states, int[] alphabet) {
+ Safe.ArgumentNotNull(states, "dfa");
+
+ m_defs.Push(new ScannerConfig {
+ states = m_states,
+ alphabetMap = m_alphabetMap
+ });
+
+ m_states = states;
+ m_alphabetMap = alphabet;
+
+ m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+ }
+
+ ///
+ /// Восстанавливает предыдущей ДКА сканнера.
+ ///
+ protected void Restore() {
+ if (m_defs.Count == 0)
+ throw new InvalidOperationException();
+ var prev = m_defs.Pop();
+ m_states = prev.states;
+ m_alphabetMap = prev.alphabetMap;
+ m_previewCode = m_alphabetMap[m_buffer[m_pointer]];
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/StarToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/StarToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,34 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Замыкание выражения с 0 и более повторов.
+ ///
+ public class StarToken: Token {
+
+ Token m_token;
+
+ public Token Token {
+ get { return m_token; }
+ }
+
+ public StarToken(Token token) {
+ Safe.ArgumentNotNull(token, "token");
+ m_token = token;
+ }
+
+ public override void Accept(IVisitor visitor) {
+ Safe.ArgumentNotNull(visitor, "visitor");
+ visitor.Visit(this);
+ }
+
+ public override string ToString() {
+ return String.Format("({0})*", Token.ToString());
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/SymbolToken.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/SymbolToken.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,33 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ ///
+ /// Выражение, соответсвующее одному символу.
+ ///
+ public class SymbolToken : Token {
+ int m_value;
+
+ public int Value {
+ get { return m_value; }
+ }
+
+ public SymbolToken(int value) {
+ m_value = value;
+ }
+ public override void Accept(IVisitor visitor) {
+ Safe.ArgumentNotNull(visitor, "visitor");
+
+ visitor.Visit(this);
+
+ }
+
+ public override string ToString() {
+ return Value.ToString();
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Parsing/Token.cs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Implab/Parsing/Token.cs Sun Jun 15 19:39:11 2014 +0400
@@ -0,0 +1,67 @@
+using Implab;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Implab.Parsing {
+ public abstract class Token {
+ public abstract void Accept(IVisitor visitor);
+
+ public Token Extend() {
+ return new CatToken(this, new EndToken());
+ }
+
+ public Token Tag(T tag) where T : IConvertible {
+ return new CatToken(this, new EndToken(tag.ToInt32(CultureInfo.InvariantCulture)));
+ }
+
+ public Token Cat(Token right) {
+ return new CatToken(this, right);
+ }
+
+ public Token Or(Token right) {
+ return new AltToken(this, right);
+ }
+
+ public Token Optional() {
+ return Or(new EmptyToken());
+ }
+
+ public Token EClosure() {
+ return new StarToken(this);
+ }
+
+ public Token Closure() {
+ return new CatToken(this, new StarToken(this));
+ }
+
+ public Token Repeat(int count) {
+ Token token = null;
+
+ for (int i = 0; i < count; i++)
+ token = token != null ? token.Cat(this) : this;
+ return token ?? new EmptyToken();
+ }
+
+ public Token Repeat(int min, int max) {
+ if (min > max || min < 1)
+ throw new ArgumentOutOfRangeException();
+ var token = Repeat(min);
+
+ for (int i = min; i < max; i++)
+ token = token.Cat( this.Optional() );
+ return token;
+ }
+
+ public static Token New(params T[] set) where T : struct, IConvertible {
+ Safe.ArgumentNotNull(set, "set");
+ Token token = null;
+ foreach(var c in set.Distinct())
+ token = token == null ? new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture)) : token.Or(new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture)));
+ return token;
+ }
+ }
+}
diff -r 2c332a9c64c0 -r c0bf853aa04f Implab/Safe.cs
--- a/Implab/Safe.cs Sat Apr 26 23:36:00 2014 +0400
+++ b/Implab/Safe.cs Sun Jun 15 19:39:11 2014 +0400
@@ -25,6 +25,11 @@
throw new ArgumentNullException(name);
}
+ public static void ArgumentInRange(int arg, int min, int max, string name) {
+ if (arg < min || arg > max)
+ throw new ArgumentOutOfRangeException(name);
+ }
+
public static void Dispose(T obj) where T : class
{
var disp = obj as IDisposable;