comparison Lib/IMPL/XML/SaxParser.pm @ 358:248f95c1762a

added simple XML Sax parser
author sergey
date Mon, 18 Nov 2013 01:25:35 +0400
parents
children
comparison
equal deleted inserted replaced
357:ec58c47edb52 358:248f95c1762a
1 package IMPL::XML::SaxParser;
2 use strict;
3
4 use IMPL::Const qw(:prop);
5 use IMPL::declare {
6 require => {
7 XMLReader => 'XML::LibXML::Reader',
8 Exception => 'IMPL::Exception',
9 ArgException => '-IMPL::InvalidArgumentException'
10 },
11 base => [ 'IMPL::Object' => undef ],
12 props => [ _reader => PROP_RW ]
13 };
14
15 BEGIN {
16 XMLReader->import;
17 }
18
19 sub Parse {
20 my ( $this, $options ) = @_;
21
22 my $reader = $this->_reader( XMLReader->new($options) );
23
24 if ( $reader->read() > 0) {
25 $this->ProcessRootNode($reader);
26 }
27 }
28
29 sub ProcessRootNode {
30 my ( $this, $reader ) = @_;
31 warn "not implemented";
32 }
33
34 sub ReadChildren {
35 my ( $this, $handler ) = @_;
36
37 my $reader = $this->_reader;
38
39 # содержимое можеть быть только у не пустых элементов
40 if($reader->nodeType == XML_READER_TYPE_ELEMENT && !$reader->isEmptyElement) {
41 # нужно прочитать все, что ниже, для этого запоминаем текущий уровень
42 my $currentLevel = $reader->depth;
43
44 # при чтении и проверке данного условия "съедается" закрывающий теэг текущего узла
45 while($reader->read && $reader->depth > $currentLevel) {
46 # при обходе дочерних узлов нужно пропустить закрывающие узлы
47 $this->$handler($reader)
48 if $handler and $reader->nodeType != XML_READER_TYPE_END_ELEMENT;
49 }
50 }
51 }
52
53 sub ReadTextNode {
54 my ($this) = @_;
55
56 my $text = "";
57
58 my $handler;
59 $handler = sub {
60 my ( $me, $reader ) = @_;
61 if ( $reader->nodeType == XML_READER_TYPE_TEXT ) {
62 $text .= $reader->value;
63 } else {
64 $this->ReadChildren($handler);
65 }
66 };
67
68 $this->ReadChildren($handler);
69
70 return $text;
71 }
72
73 sub ReadComplexContent {
74 goto &ReadComplexNode;
75 }
76
77 sub ReadComplexNode {
78 my ( $this, $schema ) = @_;
79
80 if ( ref $schema eq 'HASH' ) {
81 my %data;
82
83 my ($handlers,$aliases);
84 while(my ($selector,$handler) = each %$schema) {
85 my ($alias,$node) = split /:/, $selector;
86 $node ||= $alias;
87 $handlers->{$node} = $handler;
88 $aliases->{$node} = $alias;
89 }
90
91 $this->ReadChildren(
92 sub {
93 my ( $me, $node ) = @_;
94
95 my $name = $node->localName;
96 my $alias = $aliases->{$name};
97 if ( my $handler = $handlers->{$name} ) {
98 if (ref $handler eq 'ARRAY') {
99 push @{$data{$alias}}, $me->ReadComplexNode($$handler[0]);
100 } else {
101 $data{$alias} = $me->ReadComplexNode($handler);
102 }
103 } else {
104 $me->ReadChildren();
105 }
106 }
107 );
108
109 return \%data;
110 }
111 elsif ( ref $schema eq 'CODE' or not ref $schema ) {
112 return $this->$schema($this->_reader);
113 }
114 else {
115 die ArgException->new( schema => 'An invalid schema is supplied' );
116 }
117 }
118
119 sub attribute {
120 shift->_reader->getAttribute(shift);
121 }
122
123 1;
124
125 __END__
126
127 =pod
128
129 =head1 NAME
130
131 =head1 DESCRIPTION
132
133 =head1 MEMBERS
134
135 =head2 ReadComplexNode($schema)
136
137 =begin code
138
139 {
140 comments => sub { shift->ReadTextNode },
141 data => [ {
142 location => sub { $_[1]->getAttribute('href')} ,
143 timestamp => 'ReadTextNode'
144 } ]
145 }
146
147 =end code
148
149 =cut