Mercurial > pub > Impl
changeset 358:248f95c1762a
added simple XML Sax parser
author | sergey |
---|---|
date | Mon, 18 Nov 2013 01:25:35 +0400 (2013-11-17) |
parents | ec58c47edb52 |
children | 833e663796c4 |
files | Lib/IMPL/XML/SaxParser.pm |
diffstat | 1 files changed, 149 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/IMPL/XML/SaxParser.pm Mon Nov 18 01:25:35 2013 +0400 @@ -0,0 +1,149 @@ +package IMPL::XML::SaxParser; +use strict; + +use IMPL::Const qw(:prop); +use IMPL::declare { + require => { + XMLReader => 'XML::LibXML::Reader', + Exception => 'IMPL::Exception', + ArgException => '-IMPL::InvalidArgumentException' + }, + base => [ 'IMPL::Object' => undef ], + props => [ _reader => PROP_RW ] +}; + +BEGIN { + XMLReader->import; +} + +sub Parse { + my ( $this, $options ) = @_; + + my $reader = $this->_reader( XMLReader->new($options) ); + + if ( $reader->read() > 0) { + $this->ProcessRootNode($reader); + } +} + +sub ProcessRootNode { + my ( $this, $reader ) = @_; + warn "not implemented"; +} + +sub ReadChildren { + my ( $this, $handler ) = @_; + + my $reader = $this->_reader; + + # содержимое можеть быть только у не пустых элементов + if($reader->nodeType == XML_READER_TYPE_ELEMENT && !$reader->isEmptyElement) { + # нужно прочитать все, что ниже, для этого запоминаем текущий уровень + my $currentLevel = $reader->depth; + + # при чтении и проверке данного условия "съедается" закрывающий теэг текущего узла + while($reader->read && $reader->depth > $currentLevel) { + # при обходе дочерних узлов нужно пропустить закрывающие узлы + $this->$handler($reader) + if $handler and $reader->nodeType != XML_READER_TYPE_END_ELEMENT; + } + } +} + +sub ReadTextNode { + my ($this) = @_; + + my $text = ""; + + my $handler; + $handler = sub { + my ( $me, $reader ) = @_; + if ( $reader->nodeType == XML_READER_TYPE_TEXT ) { + $text .= $reader->value; + } else { + $this->ReadChildren($handler); + } + }; + + $this->ReadChildren($handler); + + return $text; +} + +sub ReadComplexContent { + goto &ReadComplexNode; +} + +sub ReadComplexNode { + my ( $this, $schema ) = @_; + + if ( ref $schema eq 'HASH' ) { + my %data; + + my ($handlers,$aliases); + while(my ($selector,$handler) = each %$schema) { + my ($alias,$node) = split /:/, $selector; + $node ||= $alias; + $handlers->{$node} = $handler; + $aliases->{$node} = $alias; + } + + $this->ReadChildren( + sub { + my ( $me, $node ) = @_; + + my $name = $node->localName; + my $alias = $aliases->{$name}; + if ( my $handler = $handlers->{$name} ) { + if (ref $handler eq 'ARRAY') { + push @{$data{$alias}}, $me->ReadComplexNode($$handler[0]); + } else { + $data{$alias} = $me->ReadComplexNode($handler); + } + } else { + $me->ReadChildren(); + } + } + ); + + return \%data; + } + elsif ( ref $schema eq 'CODE' or not ref $schema ) { + return $this->$schema($this->_reader); + } + else { + die ArgException->new( schema => 'An invalid schema is supplied' ); + } +} + +sub attribute { + shift->_reader->getAttribute(shift); +} + +1; + +__END__ + +=pod + +=head1 NAME + +=head1 DESCRIPTION + +=head1 MEMBERS + +=head2 ReadComplexNode($schema) + +=begin code + +{ + comments => sub { shift->ReadTextNode }, + data => [ { + location => sub { $_[1]->getAttribute('href')} , + timestamp => 'ReadTextNode' + } ] +} + +=end code + +=cut