annotate Implab/Formats/JSON/JSONScanner.cs @ 170:181119ef3b39 ref20160224

DFA refactoring, rx based dfa.
author cin
date Fri, 04 Mar 2016 01:56:31 +0300
parents e227e78d72e4
children 0c3c69fe225b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
1 using System;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
2 using System.Globalization;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
3 using Implab.Automaton;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
4
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
5 namespace Implab.Formats.JSON {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
6 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
7 /// Сканнер (лексер), разбивающий поток символов на токены JSON.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
8 /// </summary>
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
9 public class JSONScanner : Scanner<object> {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
10 char[] m_stringBuffer;
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
11 DFAStateDescriptior<>[] m_stringDFA;
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
12 int[] m_stringAlphabet;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
13
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
14 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
15 /// Создает новый экземпляр сканнера
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
16 /// </summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
17 public JSONScanner()
165
e227e78d72e4 DFA refactoring
cin
parents: 163
diff changeset
18 : base(JSONGrammar.Instance.JsonDFA.GetTransitionTable(), JSONGrammar.Instance.JsonDFA.Alphabet.GetTranslationMap()) {
163
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
19 m_stringBuffer = new char[1024];
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
20 var dfa = JSONGrammar.Instance.JsonStringDFA;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
21 m_stringAlphabet = dfa.Alphabet.GetTranslationMap();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
22 m_stringDFA = dfa.States;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
23 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
24
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
25 /// <summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
26 /// Читает следующий лексический элемент из входных данных.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
27 /// </summary>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
28 /// <param name="tokenValue">Возвращает значение прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
29 /// <param name="tokenType">Возвращает тип прочитанного токена.</param>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
30 /// <returns><c>true</c> - чтение произведено успешно. <c>false</c> - достигнут конец входных данных</returns>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
31 /// <remarks>В случе если токен не распознается, возникает исключение. Значения токенов обрабатываются, т.е.
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
32 /// в строках обрабатываются экранированные символы, числа становтся типа double.</remarks>
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
33 public bool ReadToken(out object tokenValue, out JsonTokenType tokenType) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
34 if (ReadTokenInternal()) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
35 switch ((JSONGrammar.TokenType)m_currentState.tag[0]) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
36 case JSONGrammar.TokenType.StringBound:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
37 tokenValue = ReadString();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
38 tokenType = JsonTokenType.String;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
39 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
40 case JSONGrammar.TokenType.Number:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
41 tokenValue = Double.Parse(new String(m_buffer, m_tokenOffset, m_tokenLen), CultureInfo.InvariantCulture);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
42 tokenType = JsonTokenType.Number;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
43 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
44 default:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
45 tokenType = (JsonTokenType)m_currentState.tag[0];
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
46 tokenValue = new String(m_buffer, m_tokenOffset, m_tokenLen);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
47 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
48 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
49 return true;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
50 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
51 tokenValue = null;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
52 tokenType = JsonTokenType.None;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
53 return false;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
54 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
55
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
56 string ReadString() {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
57 int pos = 0;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
58 Switch(m_stringDFA, m_stringAlphabet);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
59 while (ReadTokenInternal()) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
60 switch ((JSONGrammar.TokenType)m_currentState.tag[0]) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
61 case JSONGrammar.TokenType.StringBound:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
62 Restore();
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
63 return new String(m_stringBuffer, 0, pos);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
64 case JSONGrammar.TokenType.UnescapedChar:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
65 EnsureStringBufferSize(pos + m_tokenLen);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
66 Array.Copy(m_buffer, m_tokenOffset, m_stringBuffer, pos, m_tokenLen);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
67 pos += m_tokenLen;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
68 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
69 case JSONGrammar.TokenType.EscapedUnicode:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
70 EnsureStringBufferSize(pos + 1);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
71 m_stringBuffer[pos] = StringTranslator.TranslateHexUnicode(m_buffer, m_tokenOffset + 2);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
72 pos++;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
73 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
74 case JSONGrammar.TokenType.EscapedChar:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
75 EnsureStringBufferSize(pos + 1);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
76 m_stringBuffer[pos] = StringTranslator.TranslateEscapedChar(m_buffer[m_tokenOffset + 1]);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
77 pos++;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
78 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
79 default:
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
80 break;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
81 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
82
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
83 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
84
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
85 throw new ParserException("Unexpected end of data");
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
86 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
87
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
88 void EnsureStringBufferSize(int size) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
89 if (size > m_stringBuffer.Length) {
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
90 var newBuffer = new char[size];
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
91 m_stringBuffer.CopyTo(newBuffer, 0);
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
92 m_stringBuffer = newBuffer;
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
93 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
94 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
95 }
419aa51b04fd JSON moved to Formats namespace
cin
parents:
diff changeset
96 }