changeset 358:248f95c1762a

added simple XML Sax parser
author sergey
date Mon, 18 Nov 2013 01:25:35 +0400 (2013-11-17)
parents ec58c47edb52
children 833e663796c4
files Lib/IMPL/XML/SaxParser.pm
diffstat 1 files changed, 149 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Lib/IMPL/XML/SaxParser.pm	Mon Nov 18 01:25:35 2013 +0400
@@ -0,0 +1,149 @@
+package IMPL::XML::SaxParser;
+use strict;
+
+use IMPL::Const qw(:prop);
+use IMPL::declare {
+	require => {
+		XMLReader    => 'XML::LibXML::Reader',
+		Exception    => 'IMPL::Exception',
+		ArgException => '-IMPL::InvalidArgumentException'
+	  },
+	  base  => [ 'IMPL::Object' => undef ],
+	  props => [ _reader        => PROP_RW ]
+};
+
+BEGIN {
+	XMLReader->import;
+}
+
+sub Parse {
+	my ( $this, $options ) = @_;
+
+	my $reader = $this->_reader( XMLReader->new($options) );
+
+	if ( $reader->read() > 0) {
+		$this->ProcessRootNode($reader);
+	}
+}
+
+sub ProcessRootNode {
+	my ( $this, $reader ) = @_;
+	warn "not implemented";
+}
+
+sub ReadChildren {
+	my ( $this, $handler ) = @_;
+
+	my $reader = $this->_reader;
+	
+	# содержимое можеть быть только у не пустых элементов
+	if($reader->nodeType == XML_READER_TYPE_ELEMENT && !$reader->isEmptyElement) {
+		# нужно прочитать все, что ниже, для этого запоминаем текущий уровень
+		my $currentLevel = $reader->depth;
+		
+		# при чтении и проверке данного условия "съедается" закрывающий теэг текущего узла
+		while($reader->read && $reader->depth > $currentLevel) {
+			# при обходе дочерних узлов нужно пропустить закрывающие узлы
+			$this->$handler($reader)
+				if $handler	and $reader->nodeType != XML_READER_TYPE_END_ELEMENT;
+		}		
+	}
+}
+
+sub ReadTextNode {
+	my ($this) = @_;
+
+	my $text = "";
+
+	my $handler;
+	$handler = sub {
+		my ( $me, $reader ) = @_;
+		if ( $reader->nodeType == XML_READER_TYPE_TEXT ) {
+			$text .= $reader->value;
+		} else {
+			$this->ReadChildren($handler);
+		}
+	};
+
+	$this->ReadChildren($handler);
+
+	return $text;
+}
+
+sub ReadComplexContent {
+	goto &ReadComplexNode;
+}
+
+sub ReadComplexNode {
+	my ( $this, $schema ) = @_;
+
+	if ( ref $schema eq 'HASH' ) {
+		my %data;
+		
+		my ($handlers,$aliases);
+		while(my ($selector,$handler) = each %$schema) {
+			my ($alias,$node) = split /:/, $selector;
+			$node ||= $alias;
+			$handlers->{$node} = $handler;
+			$aliases->{$node} = $alias;
+		}
+
+		$this->ReadChildren(
+			sub {
+				my ( $me, $node ) = @_;
+
+				my $name = $node->localName;
+				my $alias = $aliases->{$name};
+				if ( my $handler = $handlers->{$name} ) {
+					if (ref $handler eq 'ARRAY') {
+						push @{$data{$alias}}, $me->ReadComplexNode($$handler[0]);
+					} else {
+						$data{$alias} = $me->ReadComplexNode($handler);
+					}
+				} else {
+					$me->ReadChildren();
+				}
+			}
+		);
+
+		return \%data;
+	}
+	elsif ( ref $schema eq 'CODE' or not ref $schema ) {
+		return $this->$schema($this->_reader);
+	}
+	else {
+		die ArgException->new( schema => 'An invalid schema is supplied' );
+	}
+}
+
+sub attribute {
+	shift->_reader->getAttribute(shift);
+}
+
+1;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+=head1 DESCRIPTION
+
+=head1 MEMBERS
+
+=head2 ReadComplexNode($schema)
+
+=begin code
+
+{
+	comments => sub { shift->ReadTextNode },
+	data => [ {
+		location => sub { $_[1]->getAttribute('href')} ,
+		timestamp => 'ReadTextNode' 
+	} ]
+}
+
+=end code
+
+=cut