# HG changeset patch # User cin # Date 1402846806 -14400 # Node ID 4fcbe7a4b36b8b929dcef4b00b303942482b0a5e # Parent c0bf853aa04fd8e9e34f9d64bc1a70b8dc602d5d# Parent 037c2897f1612e4e4f41afe7c93564cf657c0c12 Merge diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/CustomEqualityComparer.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/CustomEqualityComparer.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,49 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab { + /// + /// Обертка для создания IEqualityComparer с использованием делегатов или лямда-выражений. + /// + /// Тип сравниваемых значений + public class CustomEqualityComparer : IEqualityComparer { + Func m_equals; + Func m_hash; + + /// + /// Создает новый объект с указанными функциями сравнения на раветво и получения хеш-кода. + /// + /// Функция проверки на равенство + /// Функция получения хешкода + public CustomEqualityComparer(Func equality, Func hash) { + Safe.ArgumentNotNull(equality, "equality"); + Safe.ArgumentNotNull(hash, "hash"); + m_hash = hash; + m_equals = equality; + } + + /// + /// Сравнивает два знаечния на ревенство. + /// + /// + /// + /// Результат сравнения на равенство + public bool Equals(T x, T y) { + return m_equals(x,y); + } + + /// + /// Получает хеш-код для указанного значения. + /// + /// + /// Равные знаечния *должны* иметь одинаковый хеш-код. + /// Хеш-код + public int GetHashCode(T obj) { + return m_hash(obj); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Implab.csproj --- a/Implab/Implab.csproj Tue May 20 01:18:46 2014 +0400 +++ b/Implab/Implab.csproj Sun Jun 15 19:40:06 2014 +0400 @@ -33,6 +33,7 @@ + @@ -52,10 +53,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JSONElementContext.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JSONElementContext.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,16 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// internal + /// + public enum JSONElementContext { + None, + Object, + Array + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JSONElementType.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JSONElementType.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,34 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Тип элемента на котором находится парсер + /// + public enum JSONElementType { + None, + /// + /// Начало объекта + /// + BeginObject, + /// + /// Конец объекта + /// + EndObject, + /// + /// Начало массива + /// + BeginArray, + /// + /// Конец массива + /// + EndArray, + /// + /// Простое значение + /// + Value + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JSONGrammar.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JSONGrammar.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,113 @@ +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + internal class JSONGrammar : Grammar { + public enum TokenType : int{ + None, + BeginObject, + EndObject, + BeginArray, + EndArray, + String, + Number, + Literal, + NameSeparator, + ValueSeparator, + + StringBound, + EscapedChar, + UnescapedChar, + EscapedUnicode, + + Minus, + Plus, + Sign, + Integer, + Dot, + Exp + } + + readonly CDFADefinition m_jsonDFA; + readonly CDFADefinition m_stringDFA; + + public JSONGrammar() { + DefineAlphabet(Enumerable.Range(0, 0x20).Select(x => (char)x)); + var hexDigit = SymbolRangeToken('a','f').Or(SymbolRangeToken('A','F')).Or(SymbolRangeToken('0','9')); + var digit9 = SymbolRangeToken('1', '9'); + var zero = SymbolToken('0'); + var digit = zero.Or(digit9); + var dot = SymbolToken('.'); + var minus = SymbolToken('-'); + var sign = SymbolSetToken('-', '+'); + var expSign = SymbolSetToken('e', 'E'); + var letters = SymbolRangeToken('a', 'z'); + var integer = zero.Or(digit9.Cat(digit.EClosure())); + var frac = dot.Cat(digit.Closure()); + var exp = expSign.Cat(sign.Optional()).Cat(digit.Closure()); + var quote = SymbolToken('"'); + var backSlash = SymbolToken('\\'); + var specialEscapeChars = SymbolSetToken('\\', '"', '/', 'b', 'f', 't', 'n', 'r'); + var unicodeEspace = SymbolToken('u').Cat(hexDigit.Repeat(4)); + var escape = backSlash.Cat(specialEscapeChars.Or(unicodeEspace)); + var whitespace = SymbolSetToken('\n', '\r', '\t', ' ').EClosure(); + var beginObject = whitespace.Cat(SymbolToken('{')).Cat(whitespace); + var endObject = whitespace.Cat(SymbolToken('}')).Cat(whitespace); + var beginArray = whitespace.Cat(SymbolToken('[')).Cat(whitespace); + var endArray = whitespace.Cat(SymbolToken(']')).Cat(whitespace); + var nameSep = whitespace.Cat(SymbolToken(':')).Cat(whitespace); + var valueSep = whitespace.Cat(SymbolToken(',')).Cat(whitespace); + + var number = minus.Optional().Cat(integer).Cat(frac.Optional()).Cat(exp.Optional()); + var literal = letters.Closure(); + var unescaped = SymbolTokenExcept(Enumerable.Range(0, 0x20).Union(new int[] { '\\', '"' }).Select(x => (char)x)); + var character = unescaped.Or(escape); + var str = quote.Cat(character.EClosure()).Cat(quote); + + + var jsonExpression = + number.Tag(TokenType.Number) + .Or(literal.Tag(TokenType.Literal)) + .Or(quote.Tag(TokenType.StringBound)) + .Or(beginObject.Tag(TokenType.BeginObject)) + .Or(endObject.Tag(TokenType.EndObject)) + .Or(beginArray.Tag(TokenType.BeginArray)) + .Or(endArray.Tag(TokenType.EndArray)) + .Or(nameSep.Tag(TokenType.NameSeparator)) + .Or(valueSep.Tag(TokenType.ValueSeparator)); + + + var jsonStringExpression = + quote.Tag(TokenType.StringBound) + .Or(backSlash.Cat(specialEscapeChars).Tag(TokenType.EscapedChar)) + .Or(backSlash.Cat(unicodeEspace).Tag(TokenType.EscapedUnicode)) + .Or(unescaped.Closure().Tag(TokenType.UnescapedChar)); + + var jsonNumberExpression = + minus.Tag(TokenType.Minus) + .Or(SymbolToken('+').Tag(TokenType.Plus)) + .Or(digit.Closure().Tag(TokenType.Integer)) + .Or(dot.Tag(TokenType.Dot)) + .Or(expSign.Tag(TokenType.Exp)); + + m_jsonDFA = BuildDFA(jsonExpression); + m_stringDFA = BuildDFA(jsonStringExpression); + } + + public CDFADefinition JsonDFA { + get { + return m_jsonDFA; + } + } + + public CDFADefinition JsonStringDFA { + get { + return m_stringDFA; + } + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JSONParser.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JSONParser.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,197 @@ +using Implab; +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// internal + /// + public struct JSONParserContext { + public string memberName; + public JSONElementContext elementContext; + } + + /// + /// Pull парсер JSON данных. + /// + public class JSONParser : DFAutomaton { + + enum MemberContext { + MemberName, + MemberValue + } + + static readonly EnumAlphabet _alphabet = EnumAlphabet.FullAlphabet; + static readonly DFAStateDescriptior[] _jsonDFA; + static readonly DFAStateDescriptior[] _objectDFA; + static readonly DFAStateDescriptior[] _arrayDFA; + + static JSONParser() { + var jsonExpression = Token.New(JsonTokenType.BeginObject, JsonTokenType.BeginArray).Tag(0); + + var valueExpression = Token.New(JsonTokenType.BeginArray, JsonTokenType.BeginObject, JsonTokenType.Literal, JsonTokenType.Number, JsonTokenType.String); + var memberExpression = Token.New(JsonTokenType.String).Cat(Token.New(JsonTokenType.NameSeparator)).Cat(valueExpression); + var objectExpression = memberExpression + .Cat( + Token.New(JsonTokenType.ValueSeparator) + .Cat(memberExpression) + .EClosure() + ) + .Optional() + .Cat(Token.New(JsonTokenType.EndObject)) + .Tag(0); + var arrayExpression = valueExpression + .Cat( + Token.New(JsonTokenType.ValueSeparator) + .Cat(valueExpression) + .EClosure() + ) + .Optional() + .Cat(Token.New(JsonTokenType.EndArray)) + .Tag(0); + + _jsonDFA = BuildDFA(jsonExpression).States; + _objectDFA = BuildDFA(objectExpression).States; + _arrayDFA = BuildDFA(arrayExpression).States; + } + + static EDFADefinition BuildDFA(Token expr) { + var builder = new DFABuilder(); + var dfa = new EDFADefinition(_alphabet); + expr.Accept(builder); + + builder.BuildDFA(dfa); + return dfa; + } + + JSONScanner m_scanner; + MemberContext m_memberContext; + + JSONElementType m_elementType; + object m_elementValue; + + public JSONParser(string text) + : base(_jsonDFA, INITIAL_STATE, new JSONParserContext { elementContext = JSONElementContext.None, memberName = String.Empty } ) { + Safe.ArgumentNotEmpty(text, "text"); + m_scanner = new JSONScanner(); + m_scanner.Feed(text.ToCharArray()); + } + + public JSONElementType ElementType { + get { return m_elementType; } + } + + public string ElementName { + get { return m_context.info.memberName; } + } + + public object ElementValue { + get { return m_elementValue; } + } + + public bool Read() { + if (m_context.current == UNREACHEBLE_STATE) + throw new InvalidOperationException("The parser is in invalid state"); + object tokenValue; + JsonTokenType tokenType; + m_context.info.memberName = String.Empty; + while (m_scanner.ReadToken(out tokenValue, out tokenType)) { + Move((int)tokenType); + if (m_context.current == UNREACHEBLE_STATE) + UnexpectedToken(tokenValue, tokenType); + switch (tokenType) { + case JsonTokenType.BeginObject: + Switch( + _objectDFA, + INITIAL_STATE, + new JSONParserContext { + memberName = m_context.info.memberName, + elementContext = JSONElementContext.Object + } + ); + m_elementValue = null; + m_memberContext = MemberContext.MemberName; + m_elementType = JSONElementType.BeginObject; + return true; + case JsonTokenType.EndObject: + Restore(); + m_elementValue = null; + m_elementType = JSONElementType.EndObject; + return true; + case JsonTokenType.BeginArray: + Switch( + _arrayDFA, + INITIAL_STATE, + new JSONParserContext { + memberName = m_context.info.memberName, + elementContext = JSONElementContext.Array + } + ); + m_elementValue = null; + m_memberContext = MemberContext.MemberValue; + m_elementType = JSONElementType.BeginArray; + return true; + case JsonTokenType.EndArray: + Restore(); + m_elementValue = null; + m_elementType = JSONElementType.EndArray; + return true; + case JsonTokenType.String: + if (m_memberContext == MemberContext.MemberName) { + m_context.info.memberName = (string)tokenValue; + break; + } else { + m_elementType = JSONElementType.Value; + m_elementValue = tokenValue; + return true; + } + case JsonTokenType.Number: + m_elementType = JSONElementType.Value; + m_elementValue = tokenValue; + return true; + case JsonTokenType.Literal: + m_elementType = JSONElementType.Value; + m_elementValue = ParseLiteral((string)tokenValue); + return true; + case JsonTokenType.NameSeparator: + m_memberContext = MemberContext.MemberValue; + break; + case JsonTokenType.ValueSeparator: + m_memberContext = m_context.info.elementContext == JSONElementContext.Object ? MemberContext.MemberName : MemberContext.MemberValue; + break; + default: + UnexpectedToken(tokenValue, tokenType); + break; + } + } + if (m_context.info.elementContext != JSONElementContext.None) + throw new ParserException("Unexpedted end of data"); + return false; + } + + object ParseLiteral(string literal) { + switch (literal) { + case "null": + return null; + case "false" : + return false; + case "true": + return true; + default: + UnexpectedToken(literal, JsonTokenType.Literal); + return null; // avoid compliler error + } + } + + void UnexpectedToken(object value, JsonTokenType tokenType) { + throw new ParserException(String.Format("Unexpected token {0}: '{1}'", tokenType, value)); + } + + } + +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JSONScanner.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JSONScanner.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,89 @@ +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Сканнер, разбивающий поток символов на токены JSON. + /// + public class JSONScanner : Scanner { + char[] m_stringBuffer; + DFAStateDescriptior[] m_stringDFA; + int[] m_stringAlphabet; + + public JSONScanner() + : base(JSONGrammar.Instance.JsonDFA) { + m_stringBuffer = new char[1024]; + var dfa = JSONGrammar.Instance.JsonStringDFA; + m_stringAlphabet = dfa.Alphabet.GetTranslationMap(); + m_stringDFA = dfa.States; + } + + public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) { + if (ReadTokenInternal()) { + switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { + case JSONGrammar.TokenType.StringBound: + tokenValue = ReadString(); + tokenType = JsonTokenType.String; + break; + case JSONGrammar.TokenType.Number: + tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture); + tokenType = JsonTokenType.Number; + break; + default: + tokenType = (JsonTokenType)m_currentState.tag[0]; + tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen); + break; + } + return true; + } + tokenValue = null; + tokenType = JsonTokenType.None; + return false; + } + + string ReadString() { + int pos = 0; + Switch(m_stringDFA, m_stringAlphabet); + while (ReadTokenInternal()) { + switch ((JSONGrammar.TokenType)m_currentState.tag[0]) { + case JSONGrammar.TokenType.StringBound: + Restore(); + return new String(m_stringBuffer, 0, pos); + case JSONGrammar.TokenType.UnescapedChar: + EnsureStringBufferSize(pos + m_tokenLen); + Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen); + pos += m_tokenLen; + break; + case JSONGrammar.TokenType.EscapedUnicode: + EnsureStringBufferSize(pos + 1); + m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2); + pos++; + break; + case JSONGrammar.TokenType.EscapedChar: + EnsureStringBufferSize(pos + 1); + m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]); + pos++; + break; + default: + break; + } + + } + + throw new ParserException("Unexpected end of data"); + } + + void EnsureStringBufferSize(int size) { + if (size > m_stringBuffer.Length) { + var newBuffer = new char[size]; + m_stringBuffer.CopyTo(newBuffer, 0); + m_stringBuffer = newBuffer; + } + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JSONWriter.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JSONWriter.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,227 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + public class JSONWriter { + struct Context { + public bool needComma; + public JSONElementContext element; + } + Stack m_contextStack = new Stack(); + Context m_context; + + TextWriter m_writer; + bool m_indent; + + static readonly char [] _escapeBKS, + _escapeFWD, + _escapeCR, + _escapeNL, + _escapeTAB, + _escapeSLASH, + _escapeBSLASH, + _escapeQ; + + static JSONWriter() { + _escapeBKS = "\\b".ToCharArray(); + _escapeFWD = "\\f".ToCharArray(); + _escapeCR = "\\r".ToCharArray(); + _escapeNL = "\\n".ToCharArray(); + _escapeTAB = "\\t".ToCharArray(); + _escapeBSLASH = "\\\\".ToCharArray(); + _escapeSLASH = "\\/".ToCharArray(); + _escapeQ = "\\\"".ToCharArray(); + } + + public JSONWriter(TextWriter writer) { + Safe.ArgumentNotNull(writer, "writer"); + + m_writer = writer; + } + + void WriteMemberName(string name) { + Safe.ArgumentNotEmpty(name, "name"); + if (m_context.element != JSONElementContext.Object) + OperationNotApplicable("WriteMember"); + if (m_context.needComma) + m_writer.Write(", "); + // TODO indent + m_context.needComma = true; + Write(name); + m_writer.Write(" : "); + } + + public void WriteValue(string name, string value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string name, bool value) { + WriteMemberName(name); + Write(value); + } + + public void WriteValue(string name, double value) { + WriteMemberName(name); + Write(value); + } + + + + public void WriteValue(string value) { + if (m_context.element != JSONElementContext.Array) + OperationNotApplicable("WriteValue"); + if (m_context.needComma) + m_writer.Write(", "); + m_context.needComma = true; + + Write(value); + } + + public void WriteValue(bool value) { + if (m_context.element != JSONElementContext.Array) + OperationNotApplicable("WriteValue"); + if (m_context.needComma) + m_writer.Write(", "); + m_context.needComma = true; + + Write(value); + } + + public void WriteValue(double value) { + if (m_context.element != JSONElementContext.Array) + OperationNotApplicable("WriteValue"); + if (m_context.needComma) + m_writer.Write(", "); + m_context.needComma = true; + + Write(value); + } + + public void BeginObject() { + if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) + OperationNotApplicable("BeginObject"); + if (m_context.needComma) + m_writer.Write(", "); + m_context.needComma = true; + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Object, needComma = false }; + m_writer.Write("{ "); + } + + public void BeginObject(string name) { + WriteMemberName(name); + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Object, needComma = false }; + m_writer.Write("{ "); + } + + public void EndObject() { + if (m_context.element != JSONElementContext.Object) + OperationNotApplicable("EndArray"); + + m_writer.Write(" }"); + m_context = m_contextStack.Pop(); + } + + public void BeginArray() { + if (m_context.element != JSONElementContext.None && m_context.element != JSONElementContext.Array) + throw new InvalidOperationException(); + if (m_context.needComma) + m_writer.Write(", "); + m_context.needComma = true; + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Array, needComma = false }; + m_writer.Write("[ "); + } + + public void BeginArray(string name) { + WriteMemberName(name); + + m_contextStack.Push(m_context); + + m_context = new Context { element = JSONElementContext.Array, needComma = false }; + m_writer.Write("[ "); + } + + public void EndArray() { + if (m_context.element != JSONElementContext.Array) + OperationNotApplicable("EndArray"); + + m_writer.Write(" ]"); + m_context = m_contextStack.Pop(); + } + + void Write(bool value) { + m_writer.Write(value ? "true" : "false"); + } + + + void Write(string value) { + if (value == null) + m_writer.Write("null"); + + var chars = value.ToCharArray(); + m_writer.Write('"'); + + for (int i = 0; i < chars.Length; i++) { + var ch = chars[i]; + + switch (ch) { + case '\b': + m_writer.Write(_escapeBKS); + break; + case '\f': + m_writer.Write(_escapeFWD); + break; + case '\r': + m_writer.Write(_escapeCR); + break; + case '\n': + m_writer.Write(_escapeNL); + break; + case '\t': + m_writer.Write(_escapeTAB); + break; + case '\\': + m_writer.Write(_escapeBSLASH); + break; + case '/': + m_writer.Write(_escapeSLASH); + break; + case '"': + m_writer.Write(_escapeQ); + break; + default: + if (ch < 0x20) { + m_writer.Write("\\u00{0:x2}",(int)ch); + } else { + m_writer.Write(ch); + } + break; + } + } + + m_writer.Write('"'); + } + + void Write(double value) { + m_writer.Write(value); + } + + void OperationNotApplicable(string opName) { + throw new InvalidOperationException(String.Format("The operation '{0}' isn't applicable in the context of '{1}'", opName, m_context.element )); + } + + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/JsonTokenType.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/JsonTokenType.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Тип токенов, возвращаемых . + /// + public enum JsonTokenType : int { + None = 0, + /// + /// Начало объекта + /// + BeginObject, + /// + /// Конец объекта + /// + EndObject, + /// + /// Начало массива + /// + BeginArray, + /// + /// Конец массива + /// + EndArray, + /// + /// Строка + /// + String, + /// + /// Число + /// + Number, + /// + /// Литерал + /// + Literal, + /// + /// Разделитель имени : + /// + NameSeparator, + /// + /// Разделитель имени , + /// + ValueSeparator + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/JSON/StringTranslator.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/JSON/StringTranslator.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,96 @@ +using Implab; +using Implab.Parsing; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.JSON { + /// + /// Класс для преобразования экранированной строки JSON + /// + public class StringTranslator : Scanner { + static readonly char[] _escMap; + static readonly int[] _hexMap; + + static StringTranslator() { + var chars = new char[] { 'b', 'f', 't', 'r', 'n', '\\', '/' }; + var vals = new char[] { '\b', '\f', '\t', '\r', '\n', '\\', '/' }; + + _escMap = new char[chars.Max() + 1]; + + for (int i = 0; i < chars.Length; i++) + _escMap[chars[i]] = vals[i]; + + var hexs = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' }; + var ints = new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15 }; + + _hexMap = new int[hexs.Max() + 1]; + + for (int i = 0; i < hexs.Length; i++) + _hexMap[hexs[i]] = ints[i]; + + } + + public StringTranslator() + : base(JSONGrammar.Instance.JsonStringDFA) { + } + + public string Translate(string data) { + Safe.ArgumentNotNull(data, "data"); + return Translate(data.ToCharArray()); + } + + public string Translate(char[] data) { + Safe.ArgumentNotNull(data, "data"); + return Translate(data, data.Length); + } + + public string Translate(char[] data, int length) { + Safe.ArgumentNotNull(data, "data"); + Safe.ArgumentInRange(length, 0, data.Length, "length"); + + var translated = new char[length]; + + Feed(data,length); + + int pos = 0; + + while (ReadTokenInternal()) { + switch ((JSONGrammar.TokenType)TokenTags[0]) { + case JSONGrammar.TokenType.UnescapedChar: + Array.Copy(m_buffer,m_tokenOffset,translated,pos,m_tokenLen); + pos += m_tokenLen; + break; + case JSONGrammar.TokenType.EscapedChar: + translated[pos] = _escMap[m_buffer[m_tokenOffset + 1]]; + pos++; + break; + case JSONGrammar.TokenType.EscapedUnicode: + translated[pos] = TranslateHexUnicode(m_buffer,m_tokenOffset + 2); + pos++; + break; + } + } + + return new String(translated, 0, pos); + } + + internal static char TranslateEscapedChar(char symbol) { + return _escMap[symbol]; + } + + internal static char TranslateHexUnicode(char[] symbols, int offset) { + Debug.Assert(symbols != null); + Debug.Assert(symbols.Length - offset >= 4); + + int value = (_hexMap[symbols[offset]] << 12) + | (_hexMap[symbols[offset + 1]] << 8) + | (_hexMap[symbols[offset + 2]] << 4) + | (_hexMap[symbols[offset + 3]]); + return (char)value; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/Alphabet.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/Alphabet.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,23 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public class Alphabet: AlphabetBase { + + public override int GetSymbolIndex(char symbol) { + return symbol; + } + + public override IEnumerable InputSymbols { + get { return Enumerable.Range(char.MinValue, char.MaxValue).Select(x => (char)x); } + } + + protected override int MapSize { + get { return char.MaxValue + 1; } + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/AlphabetBase.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/AlphabetBase.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,103 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public abstract class AlphabetBase : IAlphabet { + public const int UNCLASSIFIED = 0; + + int m_nextId = 1; + int[] m_map; + + public int Count { + get { return m_nextId; } + } + + protected AlphabetBase() { + m_map = new int[MapSize]; + } + + protected AlphabetBase(int[] map) { + Debug.Assert(map != null); + Debug.Assert(map.Length == MapSize); + + m_map = map; + m_nextId = map.Max() + 1; + } + + public int DefineSymbol(T symbol) { + var index = GetSymbolIndex(symbol); + if (m_map[index] == UNCLASSIFIED) + m_map[index] = m_nextId++; + return m_map[index]; + } + + public int DefineClass(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + symbols = symbols.Distinct(); + + foreach (var symbol in symbols) { + var index = GetSymbolIndex(symbol); + if (m_map[index] == UNCLASSIFIED) + m_map[GetSymbolIndex(symbol)] = m_nextId; + else + throw new InvalidOperationException(String.Format("Symbol '{0}' already in use", symbol)); + } + return m_nextId++; + } + + public List[] CreateReverseMap() { + return + Enumerable.Range(UNCLASSIFIED, Count) + .Select( + i => InputSymbols + .Where(x => i != UNCLASSIFIED && m_map[GetSymbolIndex(x)] == i) + .ToList() + ) + .ToArray(); + } + + public int[] Reclassify(IAlphabet newAlphabet, IEnumerable> classes) { + Safe.ArgumentNotNull(newAlphabet, "newAlphabet"); + Safe.ArgumentNotNull(classes, "classes"); + var reverseMap = CreateReverseMap(); + + int[] translationMap = new int[Count]; + + foreach (var scl in classes) { + // skip if the supper class contains the unclassified element + if (scl.Contains(UNCLASSIFIED)) + continue; + var range = new List(); + foreach (var cl in scl) { + if (cl < 0 || cl >= reverseMap.Length) + throw new ArgumentOutOfRangeException(String.Format("Class {0} is not valid for the current alphabet", cl)); + range.AddRange(reverseMap[cl]); + } + var newClass = newAlphabet.DefineClass(range); + foreach (var cl in scl) + translationMap[cl] = newClass; + } + + return translationMap; + } + + public int Translate(T symbol) { + return m_map[GetSymbolIndex(symbol)]; + } + + public abstract int GetSymbolIndex(T symbol); + + public abstract IEnumerable InputSymbols { get; } + + protected abstract int MapSize { get; } + + public int[] GetTranslationMap() { + return m_map; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/AltToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/AltToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,22 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public class AltToken: BinaryToken { + public AltToken(Token left, Token right) + : base(left, right) { + } + + public override void Accept(IVisitor visitor) { + Safe.ArgumentNotNull(visitor, "visitor"); + visitor.Visit(this); + } + public override string ToString() { + return String.Format(Right is BinaryToken ? "{0}|({1})" : "{0}|{1}", Left, Right); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/BinaryToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/BinaryToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,26 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public abstract class BinaryToken : Token { + Token m_left; + Token m_right; + + public Token Left { + get { return m_left; } + } + + public Token Right { + get { return m_right; } + } + + protected BinaryToken(Token left, Token right) { + Safe.ArgumentNotNull(m_left = left, "left"); + Safe.ArgumentNotNull(m_right = right, "right"); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/CDFADefinition.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/CDFADefinition.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,36 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public class CDFADefinition : DFADefinitionBase { + Alphabet m_alphabet; + + public Alphabet Alphabet { + get { return m_alphabet; } + } + + public override int AlphabetSize { + get { return m_alphabet.Count; } + } + + public CDFADefinition(Alphabet alphabet): base() { + Safe.ArgumentNotNull(alphabet, "alphabet"); + m_alphabet = alphabet; + } + + public CDFADefinition Optimize() { + var optimized = new CDFADefinition(new Alphabet()); + + Optimize(optimized, m_alphabet, optimized.Alphabet); + return optimized; + } + + public void PrintDFA() { + PrintDFA(m_alphabet); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/CatToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/CatToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,27 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public class CatToken : BinaryToken { + public CatToken(Token left, Token right) + : base(left, right) { + } + + public override void Accept(IVisitor visitor) { + Safe.ArgumentNotNull(visitor, "visitor"); + visitor.Visit(this); + } + + public override string ToString() { + return String.Format("{0}{1}", FormatToken(Left), FormatToken(Right)); + } + + string FormatToken(Token token) { + return String.Format(token is AltToken ? "({0})" : "{0}", token); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/DFABuilder.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/DFABuilder.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,182 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Используется для построения ДКА по регулярному выражению, сначала обходит + /// регулярное выражение и вычисляет followpos, затем используется метод + /// для построения автомата. + /// + public class DFABuilder : IVisitor { + int m_idx = 0; + Token m_root; + HashSet m_firstpos; + HashSet m_lastpos; + + Dictionary> m_followpos = new Dictionary>(); + Dictionary m_indexes = new Dictionary(); + Dictionary m_ends = new Dictionary(); + + public Dictionary> FollowposMap { + get { return m_followpos; } + } + + public HashSet Followpos(int pos) { + HashSet set; + if (m_followpos.TryGetValue(pos, out set)) + return set; + return m_followpos[pos] = new HashSet(); + } + + bool Nullable(object n) { + if (n is EmptyToken || n is StarToken) + return true; + if (n is AltToken) + return Nullable(((AltToken)n).Left) || Nullable(((AltToken)n).Right); + if (n is CatToken) + return Nullable(((CatToken)n).Left) && Nullable(((CatToken)n).Right); + return false; + } + + + public void Visit(AltToken token) { + if (m_root == null) + m_root = token; + var firtspos = new HashSet(); + var lastpos = new HashSet(); + + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + token.Right.Accept(this); + firtspos.UnionWith(m_firstpos); + lastpos.UnionWith(m_lastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + } + + public void Visit(StarToken token) { + if (m_root == null) + m_root = token; + token.Token.Accept(this); + + foreach (var i in m_lastpos) + Followpos(i).UnionWith(m_firstpos); + } + + public void Visit(CatToken token) { + if (m_root == null) + m_root = token; + + var firtspos = new HashSet(); + var lastpos = new HashSet(); + token.Left.Accept(this); + firtspos.UnionWith(m_firstpos); + var leftLastpos = m_lastpos; + + token.Right.Accept(this); + lastpos.UnionWith(m_lastpos); + var rightFirstpos = m_firstpos; + + if (Nullable(token.Left)) + firtspos.UnionWith(rightFirstpos); + + if (Nullable(token.Right)) + lastpos.UnionWith(leftLastpos); + + m_firstpos = firtspos; + m_lastpos = lastpos; + + foreach (var i in leftLastpos) + Followpos(i).UnionWith(rightFirstpos); + + } + + public void Visit(EmptyToken token) { + if (m_root == null) + m_root = token; + ; + } + + public void Visit(SymbolToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = token.Value; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + } + + public void Visit(EndToken token) { + if (m_root == null) + m_root = token; + m_idx++; + m_indexes[m_idx] = Alphabet.UNCLASSIFIED; + m_firstpos = new HashSet(new[] { m_idx }); + m_lastpos = new HashSet(new[] { m_idx }); + Followpos(m_idx); + m_ends.Add(m_idx, token.Tag); + } + + public void BuildDFA(IDFADefinition dfa) { + Safe.ArgumentNotNull(dfa,"dfa"); + + var stateMap = new Dictionary, int>(new CustomEqualityComparer>( + (x, y) => x.SetEquals(y), + (x) => x.Sum(n => n.GetHashCode()) + )); + + stateMap[m_firstpos] = DefineState( dfa, m_firstpos); + Debug.Assert(stateMap[m_firstpos] == DFADefinitionBase.INITIAL_STATE); + + var queue = new Queue>(); + + queue.Enqueue(m_firstpos); + + while (queue.Count > 0) { + var state = queue.Dequeue(); + var s1 = stateMap[state]; + + for (int a = 0; a < dfa.AlphabetSize; a++) { + var next = new HashSet(); + foreach (var p in state) { + if (m_indexes[p] == a) { + next.UnionWith(Followpos(p)); + } + } + if (next.Count > 0) { + int s2; + if (!stateMap.TryGetValue(next, out s2)) { + stateMap[next] = s2 = DefineState(dfa, next); + queue.Enqueue(next); + } + dfa.DefineTransition(s1, s2, a); + } + } + + } + } + + int[] GetStateTags(HashSet state) { + Debug.Assert(state != null); + return state.Where(pos => m_ends.ContainsKey(pos)).Select(pos => m_ends[pos]).ToArray(); + } + + int DefineState(IDFADefinition automa, HashSet state) { + Debug.Assert(automa != null); + Debug.Assert(state != null); + + var tags = GetStateTags(state); + + return tags.Length > 0 ? automa.AddState(tags) : automa.AddState(); + } + + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/DFADefinitionBase.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/DFADefinitionBase.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,262 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public abstract class DFADefinitionBase : IDFADefinition { + readonly List m_states; + + public const int INITIAL_STATE = 1; + public const int UNREACHEBLE_STATE = 0; + + DFAStateDescriptior[] m_statesArray; + + public DFADefinitionBase() { + m_states = new List(); + + m_states.Add(new DFAStateDescriptior()); + } + + public DFAStateDescriptior[] States { + get { + if (m_statesArray == null) + m_statesArray = m_states.ToArray(); + return m_statesArray; + } + } + + public bool InitialStateIsFinal { + get { + return m_states[INITIAL_STATE].final; + } + } + + public int AddState() { + var index = m_states.Count; + m_states.Add(new DFAStateDescriptior { + final = false, + transitions = new int[AlphabetSize] + }); + + return index; + } + + public int AddState(int[] tag) { + var index = m_states.Count; + bool final = tag == null || tag.Length == 0 ? false : true; + m_states.Add(new DFAStateDescriptior { + final = final, + transitions = new int[AlphabetSize], + tag = final ? tag : null + }); + return index; + } + + public void DefineTransition(int s1,int s2, int symbol) { + Safe.ArgumentInRange(s1, 0, m_states.Count-1, "s1"); + Safe.ArgumentInRange(s2, 0, m_states.Count-1, "s2"); + Safe.ArgumentInRange(symbol, 0, AlphabetSize-1, "symbol"); + + m_states[s1].transitions[symbol] = s2; + } + + protected void Optimize(IDFADefinition minimalDFA,IAlphabet sourceAlphabet, IAlphabet minimalAlphabet) { + Safe.ArgumentNotNull(minimalDFA, "minimalDFA"); + Safe.ArgumentNotNull(minimalAlphabet, "minimalAlphabet"); + + var setComparer = new CustomEqualityComparer>( + (x, y) => x.SetEquals(y), + (s) => s.Sum(x => x.GetHashCode()) + ); + + var arrayComparer = new CustomEqualityComparer( + (x,y) => (new HashSet(x)).SetEquals(new HashSet(y)), + (a) => a.Sum(x => x.GetHashCode()) + ); + + var optimalStates = new HashSet>(setComparer); + var queue = new HashSet>(setComparer); + + foreach (var g in Enumerable + .Range(INITIAL_STATE, m_states.Count-1) + .Select(i => new { + index = i, + descriptor = m_states[i] + }) + .Where(x => x.descriptor.final) + .GroupBy(x => x.descriptor.tag, arrayComparer) + ) { + optimalStates.Add(new HashSet(g.Select(x => x.index))); + } + + var state = new HashSet( + Enumerable + .Range(INITIAL_STATE, m_states.Count - 1) + .Where(i => !m_states[i].final) + ); + optimalStates.Add(state); + queue.Add(state); + + while (queue.Count > 0) { + var stateA = queue.First(); + queue.Remove(stateA); + + for (int c = 0; c < AlphabetSize; c++) { + var stateX = new HashSet(); + + for(int s = 1; s < m_states.Count; s++) { + if (stateA.Contains(m_states[s].transitions[c])) + stateX.Add(s); + } + + foreach (var stateY in optimalStates.ToArray()) { + if (stateX.Overlaps(stateY) && !stateY.IsSubsetOf(stateX)) { + var stateR1 = new HashSet(stateY); + var stateR2 = new HashSet(stateY); + + stateR1.IntersectWith(stateX); + stateR2.ExceptWith(stateX); + + optimalStates.Remove(stateY); + optimalStates.Add(stateR1); + optimalStates.Add(stateR2); + + if (queue.Contains(stateY)) { + queue.Remove(stateY); + queue.Add(stateR1); + queue.Add(stateR2); + } else { + queue.Add(stateR1.Count <= stateR2.Count ? stateR1 : stateR2); + } + } + } + } + } + + // строим карты соотвествия оптимальных состояний с оригинальными + + var initialState = optimalStates.Where(x => x.Contains(INITIAL_STATE)).Single(); + + // карта получения оптимального состояния по соотвествующему ему простому состоянию + int[] reveseOptimalMap = new int[m_states.Count]; + // карта с индексами оптимальных состояний + HashSet[] optimalMap = new HashSet[optimalStates.Count + 1]; + { + optimalMap[0] = new HashSet(); // unreachable state + optimalMap[1] = initialState; // initial state + foreach (var ss in initialState) + reveseOptimalMap[ss] = 1; + + int i = 2; + foreach (var s in optimalStates) { + if (s.SetEquals(initialState)) + continue; + optimalMap[i] = s; + foreach (var ss in s) + reveseOptimalMap[ss] = i; + i++; + } + } + + // получаем минимальный алфавит + + var minClasses = new HashSet>(setComparer); + var alphaQueue = new Queue>(); + alphaQueue.Enqueue(new HashSet(Enumerable.Range(0,AlphabetSize))); + + for (int s = 1 ; s < optimalMap.Length; s++) { + var newQueue = new Queue>(); + + foreach (var A in alphaQueue) { + if (A.Count == 1) { + minClasses.Add(A); + continue; + } + + // различаем классы символов, которые переводят в различные оптимальные состояния + // optimalState -> alphaClass + var classes = new Dictionary>(); + + foreach (var term in A) { + // ищем все переходы класса по символу term + var s2 = reveseOptimalMap[ + optimalMap[s].Select(x => m_states[x].transitions[term]) // все элементарные состояния, куда переходит класс s + .Where(x => x != 0) // только допустимые + .FirstOrDefault() // первое допустимое элементарное состояние, если есть + ]; + + HashSet A2; + if (!classes.TryGetValue(s2, out A2)) { + A2 = new HashSet(); + newQueue.Enqueue(A2); + classes[s2] = A2; + } + A2.Add(term); + } + } + + if (newQueue.Count == 0) + break; + alphaQueue = newQueue; + } + + foreach (var A in alphaQueue) + minClasses.Add(A); + + var alphabetMap = sourceAlphabet.Reclassify(minimalAlphabet, minClasses); + + // построение автомата + + var states = new int[ optimalMap.Length ]; + states[0] = UNREACHEBLE_STATE; + + for(var s = INITIAL_STATE; s < states.Length; s++) { + var tags = optimalMap[s].SelectMany(x => m_states[x].tag ?? Enumerable.Empty()).Distinct().ToArray(); + if (tags.Length > 0) + states[s] = minimalDFA.AddState(tags); + else + states[s] = minimalDFA.AddState(); + } + + Debug.Assert(states[INITIAL_STATE] == INITIAL_STATE); + + for (int s1 = 1; s1 < m_states.Count; s1++) { + for (int c = 0; c < AlphabetSize; c++) { + var s2 = m_states[s1].transitions[c]; + if (s2 != UNREACHEBLE_STATE) { + minimalDFA.DefineTransition( + reveseOptimalMap[s1], + reveseOptimalMap[s2], + alphabetMap[c] + ); + } + } + } + + } + + protected void PrintDFA(IAlphabet alphabet) { + + var reverseMap = alphabet.CreateReverseMap(); + + for (int i = 1; i < reverseMap.Length; i++) { + Console.WriteLine("C{0}: {1}", i, String.Join(",", reverseMap[i])); + } + + for (int i = 1; i < m_states.Count; i++) { + var s = m_states[i]; + for (int c = 0; c < AlphabetSize; c++) + if (s.transitions[c] != UNREACHEBLE_STATE) + Console.WriteLine("S{0} -{1}-> S{2}{3}", i, String.Join(",", reverseMap[c]), s.transitions[c], m_states[s.transitions[c]].final ? "$" : ""); + } + } + + public abstract int AlphabetSize { + get; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/DFAStateDescriptor.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/DFAStateDescriptor.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public struct DFAStateDescriptior { + public bool final; + public int[] tag; + public int[] transitions; + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/DFAutomaton.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/DFAutomaton.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,56 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public abstract class DFAutomaton { + protected struct ContextFrame { + public DFAStateDescriptior[] states; + public int current; + public T info; + } + + public const int INITIAL_STATE = DFADefinitionBase.INITIAL_STATE; + public const int UNREACHEBLE_STATE = DFADefinitionBase.UNREACHEBLE_STATE; + + protected ContextFrame m_context; + Stack m_contextStack = new Stack(); + + public int Level { + get { return m_contextStack.Count; } + } + + protected DFAutomaton(DFAStateDescriptior[] states, int startState, T info) { + Safe.ArgumentNotNull(states, "states"); + Safe.ArgumentInRange(startState, 0, states.Length - 1, "startState"); + + m_context.states = states; + m_context.current = startState; + m_context.info = info; + } + + protected void Switch(DFAStateDescriptior[] states, int current, T info) { + Debug.Assert(states != null); + Debug.Assert(current >= 0 && current < states.Length); + m_contextStack.Push(m_context); + m_context. states = states; + m_context.current = current; + m_context.info = info; + } + + protected void Restore() { + Debug.Assert(m_contextStack.Count > 0); + + m_context = m_contextStack.Pop(); + } + + protected void Move(int input) { + Debug.Assert(input > 0 && input < m_context.states[m_context.current].transitions.Length); + m_context.current = m_context.states[m_context.current].transitions[input]; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/EDFADefinition.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/EDFADefinition.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,37 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public class EDFADefinition : DFADefinitionBase where T : struct, IConvertible { + EnumAlphabet m_alphabet; + + public EnumAlphabet Alphabet { + get { return m_alphabet; } + } + + public EDFADefinition(EnumAlphabet alphabet) + : base() { + Safe.ArgumentNotNull(alphabet, "alphabet"); + m_alphabet = alphabet; + } + + public override int AlphabetSize { + get { return m_alphabet.Count; } + } + + public EDFADefinition Optimize() { + var optimized = new EDFADefinition(new EnumAlphabet()); + Optimize(optimized, m_alphabet, optimized.Alphabet); + + return optimized; + } + + public void PrintDFA() { + PrintDFA(m_alphabet); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/EmptyToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/EmptyToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,18 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public class EmptyToken : Token { + public override void Accept(IVisitor visitor) { + Safe.ArgumentNotNull(visitor, "visitor"); + visitor.Visit(this); + } + public override string ToString() { + return "$"; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/EndToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/EndToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,37 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Конечный символ расширенного регулярного выражения, при построении ДКА + /// используется для определения конечных состояний. + /// + public class EndToken: Token { + + int m_tag; + + public EndToken(int tag) { + m_tag = tag; + } + + public EndToken() + : this(0) { + } + + public int Tag { + get { return m_tag; } + } + + public override void Accept(IVisitor visitor) { + Safe.ArgumentNotNull(visitor, "visitor"); + visitor.Visit(this); + } + public override string ToString() { + return "#"; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/EnumAlphabet.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/EnumAlphabet.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,68 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Алфавит символами которого являются элементы перечислений. + /// + /// Тип перечислений + public class EnumAlphabet : AlphabetBase where T : struct, IConvertible { + static readonly T[] _symbols; + static readonly EnumAlphabet _fullAlphabet; + + static EnumAlphabet() { + if (!typeof(T).IsEnum) + throw new InvalidOperationException("Invalid generic parameter, enumeration is required"); + + if (Enum.GetUnderlyingType(typeof(T)) != typeof(Int32)) + throw new InvalidOperationException("Only enums based on Int32 are supported"); + + _symbols = ((T[])Enum.GetValues(typeof(T))) + .OrderBy(x => x.ToInt32(CultureInfo.InvariantCulture)) + .ToArray(); + + if ( + _symbols[_symbols.Length - 1].ToInt32(CultureInfo.InvariantCulture) >= _symbols.Length + || _symbols[0].ToInt32(CultureInfo.InvariantCulture) != 0 + ) + throw new InvalidOperationException("The specified enumeration must be zero-based and continuously numbered"); + + _fullAlphabet = new EnumAlphabet(_symbols.Select(x => x.ToInt32(CultureInfo.InvariantCulture)).ToArray()); + } + + + + public static EnumAlphabet FullAlphabet { + get { + return _fullAlphabet; + } + } + + + public EnumAlphabet() + : base() { + } + + public EnumAlphabet(int[] map) + : base(map) { + } + + + public override int GetSymbolIndex(T symbol) { + return symbol.ToInt32(CultureInfo.InvariantCulture); + } + + public override IEnumerable InputSymbols { + get { return _symbols; } + } + + protected override int MapSize { + get { return _symbols.Length; } + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/Grammar.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/Grammar.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,103 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Базовый абстрактный класс. Грамматика, позволяет формулировать выражения над алфавитом типа char. + /// + /// + public abstract class Grammar where TGrammar: Grammar, new() { + Alphabet m_alphabet = new Alphabet(); + static TGrammar _instance; + + public static TGrammar Instance{ + get { + if (_instance == null) + _instance = new TGrammar(); + return _instance; + } + } + + public SymbolToken UnclassifiedToken() { + return new SymbolToken(Alphabet.UNCLASSIFIED); + } + + public void DefineAlphabet(IEnumerable alphabet) { + Safe.ArgumentNotNull(alphabet, "alphabet"); + + foreach (var ch in alphabet) + m_alphabet.DefineSymbol(ch); + } + public Token SymbolRangeToken(char start, char end) { + return SymbolToken(Enumerable.Range(start, end - start + 1).Select(x => (char)x)); + } + + public Token SymbolToken(char symbol) { + return Token.New(TranslateOrAdd(symbol)); + } + + public Token SymbolToken(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New(TranslateOrAdd(symbols).ToArray()); + } + + public Token SymbolSetToken(params char[] set) { + return SymbolToken(set); + } + + int TranslateOrAdd(char ch) { + var t = m_alphabet.Translate(ch); + if (t == Alphabet.UNCLASSIFIED) + t = m_alphabet.DefineSymbol(ch); + return t; + } + + IEnumerable TranslateOrAdd(IEnumerable symbols) { + return symbols.Distinct().Select(TranslateOrAdd); + } + + int TranslateOrDie(char ch) { + var t = m_alphabet.Translate(ch); + if (t == Alphabet.UNCLASSIFIED) + throw new ApplicationException(String.Format("Symbol '{0}' is UNCLASSIFIED", ch)); + return t; + } + + IEnumerable TranslateOrDie(IEnumerable symbols) { + return symbols.Distinct().Select(TranslateOrDie); + } + + public Token SymbolTokenExcept(IEnumerable symbols) { + Safe.ArgumentNotNull(symbols, "symbols"); + + return Token.New( Enumerable.Range(0, m_alphabet.Count).Except(TranslateOrDie(symbols)).ToArray()); + } + + protected CDFADefinition BuildDFA(Token lang) { + Safe.ArgumentNotNull(lang, "lang"); + + var dfa = new CDFADefinition(m_alphabet); + + var builder = new DFABuilder(); + + lang.Accept( builder ); + + builder.BuildDFA(dfa); + if (dfa.InitialStateIsFinal) + throw new ApplicationException("The specified language contains empty token"); + + return dfa.Optimize(); + } + + + + //protected abstract TGrammar CreateInstance(); + } + + +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/IAlphabet.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/IAlphabet.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,56 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Алфавит. Множество символов, которые разбиты на классы, при этом классы имеют непрерывную нумерацию, + /// что позволяет использовать их в качестве индексов массивов. + /// + /// Далее вимволами алфавита будем называть классы исходных символов. + /// Тип символов. + public interface IAlphabet { + /// + /// Количество символов в алфавите. + /// + int Count { get; } + /// + /// Добавляет новый символ в алфавит, если символ уже был добавлен, то + /// возвращается ранее сопоставленный с символом класс. + /// + /// Символ для добавления. + /// Индекс класса, который попоставлен с символом. + int DefineSymbol(TSymbol symbol); + /// + /// Доабвляем класс символов. Множеству указанных исходных символов + /// будет сопоставлен символ в алфавите. + /// + /// Множестов исходных символов + /// Идентификатор символа алфавита. + int DefineClass(IEnumerable symbols); + /// + /// Создает карту обратного сопоставления символа алфавита и сопоставленным + /// ему исходным символам. + /// + /// + List[] CreateReverseMap(); + /// + /// Создает новый алфавит на основе текущего, горппируя его сиволы в более + /// крупные непересекающиеся классы символов. + /// + /// Новый, пустой алфавит, в котором быдут определены классы. + /// Множество классов символов текущего алфавита. + /// Карта для перехода символов текущего + /// алфавита к символам нового. + int[] Reclassify(IAlphabet newAlphabet, IEnumerable> classes); + + /// + /// Преобразует входной символ в индекс символа из алфавита. + /// + /// Исходный символ + /// Индекс в алфавите + int Translate(TSymbol symobl); + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/IDFADefinition.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/IDFADefinition.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Интерфейс для определения ДКА, позволяет добавить состояния и определить переходы. + /// + public interface IDFADefinition { + /// + /// Добавляет состояние в автомат. + /// + /// Индекс добавленного состояния. + int AddState(); + /// + /// Добавляет конечное состояние с указанными метками, если метки не заданы, то + /// добавленное состояние не будет конечным. + /// + /// Метки состояния. + /// Индекс добавленного состояния. + int AddState(int[] tags); + /// + /// Определяет переход между состояниями. + /// + /// Исходное состояние. + /// Конечное состояние. + /// Входной символ. + void DefineTransition(int s1, int s2, int input); + /// + /// Размер входного алфавита. + /// + int AlphabetSize { get; } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/IVisitor.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/IVisitor.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,19 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Интерфейс обходчика синтаксического дерева регулярного выражения + /// + public interface IVisitor { + void Visit(AltToken token); + void Visit(StarToken token); + void Visit(CatToken token); + void Visit(EmptyToken token); + void Visit(EndToken token); + void Visit(SymbolToken token); + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/ParserException.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/ParserException.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace Implab.Parsing { + [Serializable] + public class ParserException : Exception { + public ParserException() { } + public ParserException(string message) : base(message) { } + public ParserException(string message, Exception inner) : base(message, inner) { } + protected ParserException( + System.Runtime.Serialization.SerializationInfo info, + System.Runtime.Serialization.StreamingContext context) + : base(info, context) { } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/Scanner.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/Scanner.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,207 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Базовый класс для разбора потока входных символов на токены. + /// + /// + /// Сканнер имеет внутри буффер с симолами входного текста, по которому перемещаются два + /// указателя, начала и конца токена, при перемещении искользуется ДКА для определения + /// конца токена и допустимости текущего символа. + /// + public class Scanner { + struct ScannerConfig { + public DFAStateDescriptior[] states; + public int[] alphabetMap; + } + + Stack m_defs = new Stack(); + + DFAStateDescriptior[] m_states; + int[] m_alphabetMap; + + protected DFAStateDescriptior m_currentState; + int m_previewCode; + + protected int m_tokenLen = 0; + protected int m_tokenOffset; + + protected char[] m_buffer; + protected int m_bufferSize; + protected int m_pointer; + + public Scanner(CDFADefinition definition, string text) { + Safe.ArgumentNotNull(definition, "definition"); + Safe.ArgumentNotEmpty(text, "text"); + + m_states = definition.States; + m_alphabetMap = definition.Alphabet.GetTranslationMap(); + + Feed(text.ToCharArray()); + } + + public Scanner(CDFADefinition definition) { + Safe.ArgumentNotNull(definition, "definition"); + + m_states = definition.States; + m_alphabetMap = definition.Alphabet.GetTranslationMap(); + + Feed(new char[0]); + } + + /// + /// Заполняет входными данными буффер. + /// + /// Данные для обработки. + /// Копирование данных не происходит, переданный массив используется в + /// качестве входного буффера. + public void Feed(char[] data) { + Safe.ArgumentNotNull(data, "data"); + + Feed(data, data.Length); + } + + /// + /// Заполняет буффур чтения входными данными. + /// + /// Данные для обработки. + /// Длина данных для обработки. + /// Копирование данных не происходит, переданный массив используется в + /// качестве входного буффера. + public void Feed(char[] data, int length) { + Safe.ArgumentNotNull(data, "data"); + Safe.ArgumentInRange(length, 0, data.Length, "length"); + + m_pointer = -1; + m_buffer = data; + m_bufferSize = length; + Shift(); + } + + /// + /// Получает текущий токен в виде строки. + /// + /// + public string GetTokenValue() { + return new String(m_buffer, m_tokenOffset, m_tokenLen); + } + + /// + /// Метки текущего токена, которые были назначены в регулярном выражении. + /// + public int[] TokenTags { + get { + return m_currentState.tag; + } + } + + /// + /// Читает следующий токен, при этом указывает на начало токена, + /// на длину токена, - массив символов, в + /// котором находится токен. + /// + /// false - достигнут конец данных, токен не прочитан. + protected bool ReadTokenInternal() { + if (m_pointer >= m_bufferSize) + return false; + + m_currentState = m_states[CDFADefinition.INITIAL_STATE]; + m_tokenLen = 0; + m_tokenOffset = m_pointer; + int nextState = CDFADefinition.UNREACHEBLE_STATE; + do { + nextState = m_currentState.transitions[m_previewCode]; + if (nextState == CDFADefinition.UNREACHEBLE_STATE) { + if (m_currentState.final) + return true; + else + throw new ParserException( + String.Format( + "Unexpected symbol '{0}', at pos {1}", + m_buffer[m_pointer], + Position + ) + ); + } else { + m_currentState = m_states[nextState]; + m_tokenLen++; + } + + } while (Shift()); + + // END OF DATA + if (!m_currentState.final) + throw new ParserException("Unexpected end of data"); + + return true; + } + + + bool Shift() { + m_pointer++; + + if (m_pointer >= m_bufferSize) { + return ReadNextChunk(); + } + + m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + + return true; + } + + /// + /// Вызывается по достижению конца входного буффера для получения + /// новых данных. + /// + /// true - новые двнные получены, можно продолжать обработку. + protected virtual bool ReadNextChunk() { + return false; + } + + /// + /// Позиция сканнера во входном буфере + /// + public int Position { + get { + return m_pointer + 1; + } + } + + /// + /// Преключает внутренний ДКА на указанный, позволяет реализовать подобие захватывающей + /// группировки. + /// + /// Таблица состояний нового ДКА + /// Таблица входных символов для нового ДКА + protected void Switch(DFAStateDescriptior[] states, int[] alphabet) { + Safe.ArgumentNotNull(states, "dfa"); + + m_defs.Push(new ScannerConfig { + states = m_states, + alphabetMap = m_alphabetMap + }); + + m_states = states; + m_alphabetMap = alphabet; + + m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + } + + /// + /// Восстанавливает предыдущей ДКА сканнера. + /// + protected void Restore() { + if (m_defs.Count == 0) + throw new InvalidOperationException(); + var prev = m_defs.Pop(); + m_states = prev.states; + m_alphabetMap = prev.alphabetMap; + m_previewCode = m_alphabetMap[m_buffer[m_pointer]]; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/StarToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/StarToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,34 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Замыкание выражения с 0 и более повторов. + /// + public class StarToken: Token { + + Token m_token; + + public Token Token { + get { return m_token; } + } + + public StarToken(Token token) { + Safe.ArgumentNotNull(token, "token"); + m_token = token; + } + + public override void Accept(IVisitor visitor) { + Safe.ArgumentNotNull(visitor, "visitor"); + visitor.Visit(this); + } + + public override string ToString() { + return String.Format("({0})*", Token.ToString()); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/SymbolToken.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/SymbolToken.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,33 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + /// + /// Выражение, соответсвующее одному символу. + /// + public class SymbolToken : Token { + int m_value; + + public int Value { + get { return m_value; } + } + + public SymbolToken(int value) { + m_value = value; + } + public override void Accept(IVisitor visitor) { + Safe.ArgumentNotNull(visitor, "visitor"); + + visitor.Visit(this); + + } + + public override string ToString() { + return Value.ToString(); + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Parsing/Token.cs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Implab/Parsing/Token.cs Sun Jun 15 19:40:06 2014 +0400 @@ -0,0 +1,67 @@ +using Implab; +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Implab.Parsing { + public abstract class Token { + public abstract void Accept(IVisitor visitor); + + public Token Extend() { + return new CatToken(this, new EndToken()); + } + + public Token Tag(T tag) where T : IConvertible { + return new CatToken(this, new EndToken(tag.ToInt32(CultureInfo.InvariantCulture))); + } + + public Token Cat(Token right) { + return new CatToken(this, right); + } + + public Token Or(Token right) { + return new AltToken(this, right); + } + + public Token Optional() { + return Or(new EmptyToken()); + } + + public Token EClosure() { + return new StarToken(this); + } + + public Token Closure() { + return new CatToken(this, new StarToken(this)); + } + + public Token Repeat(int count) { + Token token = null; + + for (int i = 0; i < count; i++) + token = token != null ? token.Cat(this) : this; + return token ?? new EmptyToken(); + } + + public Token Repeat(int min, int max) { + if (min > max || min < 1) + throw new ArgumentOutOfRangeException(); + var token = Repeat(min); + + for (int i = min; i < max; i++) + token = token.Cat( this.Optional() ); + return token; + } + + public static Token New(params T[] set) where T : struct, IConvertible { + Safe.ArgumentNotNull(set, "set"); + Token token = null; + foreach(var c in set.Distinct()) + token = token == null ? new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture)) : token.Or(new SymbolToken(c.ToInt32(CultureInfo.InvariantCulture))); + return token; + } + } +} diff -r 037c2897f161 -r 4fcbe7a4b36b Implab/Safe.cs --- a/Implab/Safe.cs Tue May 20 01:18:46 2014 +0400 +++ b/Implab/Safe.cs Sun Jun 15 19:40:06 2014 +0400 @@ -25,6 +25,11 @@ throw new ArgumentNullException(name); } + public static void ArgumentInRange(int arg, int min, int max, string name) { + if (arg < min || arg > max) + throw new ArgumentOutOfRangeException(name); + } + public static void Dispose(T obj) where T : class { var disp = obj as IDisposable;