# HG changeset patch # User Sergey # Date 1258927146 -10800 # Node ID c2e7f7c96bcdb787407f71c86504fda2cab7b1f1 # Parent 1828103371d01950f8bbf620d14c7fffa909d3d3 performance improvements, DOM reworked (a little) diff -r 1828103371d0 -r c2e7f7c96bcd Lib/IMPL/DOM/Document.pm --- a/Lib/IMPL/DOM/Document.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/Lib/IMPL/DOM/Document.pm Mon Nov 23 00:59:06 2009 +0300 @@ -6,6 +6,10 @@ __PACKAGE__->PassThroughArgs; +sub document { + return $_[0]; +} + sub Create { my ($this,$nodeName,$class,$refProps) = @_; @@ -15,6 +19,7 @@ return $class->new( nodeName => $nodeName, + document => $this, %$refProps ); } diff -r 1828103371d0 -r c2e7f7c96bcd Lib/IMPL/DOM/Node.pm --- a/Lib/IMPL/DOM/Node.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/Lib/IMPL/DOM/Node.pm Mon Nov 23 00:59:06 2009 +0300 @@ -12,20 +12,24 @@ use IMPL::Exception; BEGIN { - public _direct property nodeName => prop_get | owner_set; + public _direct property nodeName => prop_get; + public _direct property document => prop_get; public _direct property isComplex => { get => \&_getIsComplex } ; public _direct property nodeValue => prop_all; public _direct property childNodes => { get => \&_getChildNodes }; public _direct property parentNode => prop_get ; - public _direct property rootNode => { get => \&_getRootNode}; private _direct property _propertyMap => prop_all ; } sub CTOR { my ($this,%args) = @_; - $this->nodeName(delete $args{nodeName}) or die new IMPL::InvalidArgumentException("A name is required"); - $this->nodeValue(delete $args{nodeValue}); + $this->{$nodeName} = delete $args{nodeName} or die new IMPL::InvalidArgumentException("A name is required"); + $this->{$nodeValue} = delete $args{nodeValue} if exists $args{nodeValue}; + if ( exists $args{document} ) { + $this->{$document} = delete $args{document}; + weaken($this->{$document}); + } $this->{$_propertyMap} = \%args; } @@ -198,25 +202,16 @@ $_[0]->childNodes->Count ? 1 : 0; } -sub _getRootNode { - $_[0]->{$rootNode} || $_[0]; -} - -sub _updateRootRefs { +sub _updateDocRefs { my ($this) = @_; - if ( my $newRoot = $this->{$parentNode} ? $this->{$parentNode}->rootNode : undef) { - if ($this->{$rootNode} ? $this->{$rootNode} != $newRoot : 1 ) { - $this->{$rootNode} = $newRoot; - weaken($this->{$rootNode}); - } - } elsif($this->{$rootNode}) { - delete $this->{$rootNode}; - } + # this method is called by the parent node on his children, so we need no to check parent + $this->{$document} = $this->{$parentNode}->document; - if ($this->{$childNodes}) { - $_->_updateRootRefs foreach @{$this->{$childNodes}}; - } + # prevetn ciclyc + weaken($this->{$document}) if $this->{$document}; + + $_->_updateDocRefs foreach @{$this->{$childNodes}}; } sub _setParent { @@ -224,21 +219,31 @@ if (($node || 0) != ($this->{$parentNode} || 0)) { + my $newOwner; if ($node) { $this->{$parentNode} = $node; + $newOwner = $node->document || 0; + # prevent from creating cyclicreferences weaken($this->{$parentNode}); + } else { delete $this->{$parentNode}; + $newOwner = 0; } - $this->_updateRootRefs; + + if (($this->{$document}||0) != $newOwner) { + $this->{$document} = $newOwner; + weaken($this->{$document}) if $newOwner; + $_->_updateDocRefs foreach @{$this->childNodes}; + } } } sub text { my ($this) = @_; - join '', $this->nodeValue || '', map $_->nodeValue || '', @{$this->childNodes}; + join ('', $this->nodeValue || '', map ($_->text || '', @{$this->childNodes})); } sub nodeProperty { diff -r 1828103371d0 -r c2e7f7c96bcd Lib/IMPL/DOM/Schema.pm --- a/Lib/IMPL/DOM/Schema.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/Lib/IMPL/DOM/Schema.pm Mon Nov 23 00:59:06 2009 +0300 @@ -31,11 +31,13 @@ $_[0]->{$_TypesMap}->{$_[1]}; } -#sub Create { -# my ($this,$nodeName,$class,$refArgs) = @_; +sub Create { + my ($this,$nodeName,$class,$refArgs) = @_; -# goto &SUPER::Create unless $class eq 'IMPL::DOM::Schema::Validator' -#} + die new IMPL::Exception('Invalid node class') unless $class->isa('IMPL::DOM::Schema::Node'); + + goto &SUPER::Create; +} sub Process { my ($this) = @_; diff -r 1828103371d0 -r c2e7f7c96bcd Lib/IMPL/DOM/Schema/Node.pm --- a/Lib/IMPL/DOM/Schema/Node.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/Lib/IMPL/DOM/Schema/Node.pm Mon Nov 23 00:59:06 2009 +0300 @@ -30,7 +30,7 @@ sub Validate { my ($this,$node) = @_; - if (my $schemaType = $this->{$type} ? $this->rootNode->resolveType($this->{$type}) : undef ) { + if (my $schemaType = $this->{$type} ? $this->document->resolveType($this->{$type}) : undef ) { return $schemaType->Validate($node); } else { return (); diff -r 1828103371d0 -r c2e7f7c96bcd Lib/IMPL/DOM/XMLReader.pm --- a/Lib/IMPL/DOM/XMLReader.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/Lib/IMPL/DOM/XMLReader.pm Mon Nov 23 00:59:06 2009 +0300 @@ -12,6 +12,8 @@ BEGIN { public _direct property Navigator => prop_get | owner_set; private _direct property _current => prop_all; + private _direct property _text => prop_all; + private _direct property _textHistory => prop_all; } sub Parse { @@ -33,9 +35,9 @@ my $parser = new XML::Parser( Handlers => { - Start => sub {shift; goto &_OnStart($this,@_)}, - End => sub {shift; goto &_OnEnd($this,@_)}, - Char => sub {shift; goto &_OnChar($this,@_)} + Start => sub {shift; unshift @_, $this; goto &_OnBegin;}, + End => sub {shift; unshift @_, $this; goto &_OnEnd;}, + Char => sub {shift; unshift @_, $this; goto &_OnChar;} } ); @@ -46,19 +48,22 @@ sub _OnBegin { my ($this,$element,%attrs) = @_; + push @{$this->{$_textHistory}},$this->{$_text}; + $this->{$_text} = ""; $this->{$_current} = $this->Navigator->NavigateCreate($element,%attrs); } sub _OnEnd { my ($this,$element) = @_; - $this->{$_current} = $this->Back; + $this->{$_current}->nodeValue($this->{$_text}) if length $this->{$_text}; + $this->{$_text} = pop @{$this->{$_textHistory}}; + $this->{$_current} = $this->Navigator->Back; } sub _OnChar { my ($this,$val) = @_; - - $this->{$_current}->nodeValue($this->{$_current}->nodeValue . $val); + $this->{$_text} .= $val; } 1; diff -r 1828103371d0 -r c2e7f7c96bcd _test/Resources/large.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/_test/Resources/large.xml Mon Nov 23 00:59:06 2009 +0300 @@ -0,0 +1,2 @@ + +WEB-DEVWEB-DEV268435456WEB-DEVACPI\PNP0A03\0WEB-DEV268435456WEB-DEVACPI\PNP0A03\0WEB-DEVWEB-DEV3758096384WEB-DEVWEB-DEV3758096384WEB-DEVWEB-DEVWEB-DEV4026531840WEB-DEVWEB-DEV4026531840WEB-DEVWEB-DEVWEB-DEV4027056128WEB-DEVWEB-DEV4027056128WEB-DEVWEB-DEVWEB-DEV4030726144WEB-DEVWEB-DEV4030726144WEB-DEVWEB-DEVWEB-DEV4034920448WEB-DEVWEB-DEV4034920448WEB-DEVWEB-DEVWEB-DEV4034936832WEB-DEVWEB-DEV4034936832WEB-DEVWEB-DEVWEB-DEV655360WEB-DEVACPI\PNP0A03\0WEB-DEV655360WEB-DEVACPI\PNP0A03\0WEB-DEVWEB-DEV655360WEB-DEVWEB-DEV655360WEB-DEVWEB-DEVWEB-DEV2WEB-DEVWEB-DEV2WEB-DEVWEB-DEVWEB-DEV4WEB-DEVWEB-DEV4WEB-DEVWEB-DEVWEB-DEV1WEB-DEVWEB-DEV1WEB-DEVWEB-DEVWEB-DEV10WEB-DEVWEB-DEV10WEB-DEVWEB-DEVWEB-DEV11WEB-DEVWEB-DEV11WEB-DEVWEB-DEVWEB-DEV12WEB-DEVWEB-DEV12WEB-DEVWEB-DEVWEB-DEV14WEB-DEVWEB-DEV14WEB-DEVWEB-DEVWEB-DEV15WEB-DEVWEB-DEV15WEB-DEVWEB-DEVWEB-DEV5WEB-DEVWEB-DEV5WEB-DEVWEB-DEVWEB-DEV6WEB-DEVWEB-DEV6WEB-DEVWEB-DEVWEB-DEV9WEB-DEVACPI_HAL\PNP0C08\0WEB-DEV9WEB-DEVACPI_HAL\PNP0C08\0WEB-DEVWEB-DEV9WEB-DEVWEB-DEV9WEB-DEVWEB-DEVWEB-DEV0WEB-DEVWEB-DEV0WEB-DEVWEB-DEVWEB-DEV0WEB-DEVACPI\PNP0A03\0WEB-DEV0WEB-DEVACPI\PNP0A03\0WEB-DEVWEB-DEV100WEB-DEVWEB-DEV100WEB-DEVWEB-DEVWEB-DEV1008WEB-DEVWEB-DEV1008WEB-DEVWEB-DEVWEB-DEV1014WEB-DEVWEB-DEV1014WEB-DEVWEB-DEVWEB-DEV1015WEB-DEVWEB-DEV1015WEB-DEVWEB-DEVWEB-DEV128WEB-DEVWEB-DEV128WEB-DEVWEB-DEVWEB-DEV1912WEB-DEVWEB-DEV1912WEB-DEVWEB-DEVWEB-DEV192WEB-DEVWEB-DEV192WEB-DEVWEB-DEVWEB-DEV2681WEB-DEVISAPNP\READDATAPORT\0WEB-DEV2681WEB-DEVISAPNP\READDATAPORT\0WEB-DEVWEB-DEV3328WEB-DEVACPI\PNP0A03\0WEB-DEV3328WEB-DEVACPI\PNP0A03\0WEB-DEVWEB-DEV368WEB-DEVWEB-DEV368WEB-DEVWEB-DEVWEB-DEV496WEB-DEVWEB-DEV496WEB-DEVWEB-DEVWEB-DEV53248WEB-DEVWEB-DEV53248WEB-DEVWEB-DEVWEB-DEV53280WEB-DEVWEB-DEV53280WEB-DEVWEB-DEVWEB-DEV53312WEB-DEVWEB-DEV53312WEB-DEVWEB-DEVWEB-DEV53504WEB-DEVWEB-DEV53504WEB-DEVWEB-DEVWEB-DEV53760WEB-DEVWEB-DEV53760WEB-DEVWEB-DEVWEB-DEV628WEB-DEVISAPNP\READDATAPORT\0WEB-DEV628WEB-DEVISAPNP\READDATAPORT\0WEB-DEVWEB-DEV633WEB-DEVISAPNP\READDATAPORT\0WEB-DEV633WEB-DEVISAPNP\READDATAPORT\0WEB-DEVWEB-DEV886WEB-DEVWEB-DEV886WEB-DEVWEB-DEVWEB-DEV888WEB-DEVWEB-DEV888WEB-DEVWEB-DEVWEB-DEV944WEB-DEVWEB-DEV944WEB-DEVWEB-DEVWEB-DEV96WEB-DEVWEB-DEV96WEB-DEVWEB-DEVWEB-DEV960WEB-DEVWEB-DEV960WEB-DEV diff -r 1828103371d0 -r c2e7f7c96bcd _test/Resources/person_info.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/_test/Resources/person_info.xml Mon Nov 23 00:59:06 2009 +0300 @@ -0,0 +1,10 @@ + + + Norman + Freeman +
+ hellroad + 1 +
+
+ diff -r 1828103371d0 -r c2e7f7c96bcd _test/Test/DOM/Builder.pm --- a/_test/Test/DOM/Builder.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/_test/Test/DOM/Builder.pm Mon Nov 23 00:59:06 2009 +0300 @@ -12,6 +12,7 @@ require IMPL::DOM::Navigator::Builder; require IMPL::DOM::Navigator::SimpleBuilder; require IMPL::DOM::Document; +require IMPL::DOM::XMLReader; BEGIN { public property schemaDoc => prop_all; @@ -56,18 +57,12 @@ $this->schemaDoc ); - use Time::HiRes qw(gettimeofday tv_interval); - - my $t = [gettimeofday]; - $builder->NavigateCreate('personInfo', version => '1'); $builder->NavigateCreate('firstName')->nodeValue('Nemo'); $builder->Back(); $builder->NavigateCreate('lastName')->nodeValue('Nobel'); $builder->Back(); - $builder->NavigateCreate('lastName')->nodeValue('Gardum'); - $builder->Back(); - for(1..100) { + for(1..10) { $builder->NavigateCreate('address', local => 1); $builder->NavigateCreate('street')->nodeValue('Hellroad'); $builder->Back(); @@ -77,10 +72,8 @@ } $builder->Back(); - print "Build: ",tv_interval($t,[gettimeofday]),"\n"; - $t = [gettimeofday]; - print $_->Message,"\n" foreach $this->schemaDoc->Validate($builder->Document); - print "Validate: ",tv_interval($t,[gettimeofday]),"\n"; + my @errors = $this->schemaDoc->Validate($builder->Document); + failed ("The built document doesn't pass a validation",@errors) if @errors; return 1; }; @@ -90,18 +83,12 @@ my $builder = IMPL::DOM::Navigator::SimpleBuilder->new(); - use Time::HiRes qw(gettimeofday tv_interval); - - my $t = [gettimeofday]; - $builder->NavigateCreate('personInfo', version => '1'); $builder->NavigateCreate('firstName')->nodeValue('Nemo'); $builder->Back(); $builder->NavigateCreate('lastName')->nodeValue('Nobel'); $builder->Back(); - $builder->NavigateCreate('lastName')->nodeValue('Gardum'); - $builder->Back(); - for(1..100) { + for(1..10) { $builder->NavigateCreate('address', local => 1); $builder->NavigateCreate('street')->nodeValue('Hellroad'); $builder->Back(); @@ -111,20 +98,28 @@ } $builder->Back(); - print "Build: ",tv_interval($t,[gettimeofday]),"\n"; - $t = [gettimeofday]; - print $_->Message,"\n" foreach $this->schemaDoc->Validate($builder->Document); - print "Validate: ",tv_interval($t,[gettimeofday]),"\n"; - - $t = [gettimeofday]; - for (1...100) { - my $node = new IMPL::DOM::Node(nodeName => 'dummy', foo => 'bar'); - $node->nodeValue('dummy content'); - } - print "Create a set of nodes: ",tv_interval($t,[gettimeofday]),"\n"; + my @errors = $this->schemaDoc->Validate($builder->Document); + failed ("The built document doesn't pass a validation",@errors) if @errors; return 1; }; +test BuildDocumentFromXml => sub { + my ($this) = @_; + + my $builder = IMPL::DOM::Navigator::SimpleBuilder->new(); + my $reader = IMPL::DOM::XMLReader->new( Navigator => $builder ); + + $reader->ParseFile("Resources/person_info.xml"); + + my $doc = $builder->Document() or failed("No document was constrcuted"); + + my @errors = $this->schemaDoc->Validate($doc); + failed("The document isn't correct", @errors) if @errors; + my $name = ($doc->selectNodes("firstName"))[0]->nodeValue; + failed("The firstName has a wrong value", "Expected: Norman", "Got: $name") unless $name eq "Norman"; + +}; + 1; diff -r 1828103371d0 -r c2e7f7c96bcd _test/Test/DOM/Node.pm --- a/_test/Test/DOM/Node.pm Fri Nov 20 16:48:08 2009 +0300 +++ b/_test/Test/DOM/Node.pm Mon Nov 23 00:59:06 2009 +0300 @@ -17,7 +17,7 @@ test Create => sub { my ($this) = @_; - $this->Root(new IMPL::DOM::Node(nodeName => 'Root')) or failed "Failed to create a node"; + $this->Root(new IMPL::DOM::Document(nodeName => 'Root')) or failed "Failed to create a document"; }; test InsertNode => sub { @@ -36,13 +36,13 @@ failed "removeLast returned incorrect results" unless $lastChild == $child; }; -test GetRootNode => sub { +test GetDocumentNode => sub { my ($this) = @_; my $child = $this->Root->firstChild->appendNode(new IMPL::DOM::Node(nodeName => 'GrandChild')) or failed "Failed to append a child node"; - failed "rootNode is undef" unless $child->rootNode; - failed "rootNode returned incorrect value" unless $child->rootNode == $this->Root; + failed "document property is undef" unless $child->document; + failed "document property returned incorrect value" unless $child->document == $this->Root; }; test MoveNode => sub { @@ -52,7 +52,7 @@ $this->Root->appendNode($grandChild); failed "incorrect new parentNode value" unless ($grandChild->parentNode || 0) == $this->Root; - failed "incorrect new rootNode value" unless ($grandChild->rootNode || 0) == $this->Root; + failed "incorrect new document value" unless ($grandChild->document || 0) == $this->Root; }; test AppendRange => sub { diff -r 1828103371d0 -r c2e7f7c96bcd _test/any.pl --- a/_test/any.pl Fri Nov 20 16:48:08 2009 +0300 +++ b/_test/any.pl Mon Nov 23 00:59:06 2009 +0300 @@ -3,6 +3,7 @@ use lib '..\Lib'; require IMPL::DOM::Navigator::SimpleBuilder; +require IMPL::DOM::XMLReader; my $builder = IMPL::DOM::Navigator::SimpleBuilder->new(); @@ -17,7 +18,7 @@ $builder->Back(); $builder->NavigateCreate('lastName')->nodeValue('Gardum'); $builder->Back(); - for(1..10000) { + for(my $i = 0 ; $i < 10000; $i++) { $builder->NavigateCreate('address', local => 1); $builder->NavigateCreate('street')->nodeValue('Hellroad'); $builder->Back(); @@ -27,4 +28,38 @@ } $builder->Back(); - print "Build: ",tv_interval($t,[gettimeofday]),"\n"; \ No newline at end of file + print "Build: ",tv_interval($t,[gettimeofday]),"\n"; + + $t = [gettimeofday]; + + my $doc = new IMPL::DOM::Document(nodeName => 'doc'); + for(my $i = 0 ; $i < 30000; $i++) { + my $node = new IMPL::DOM::Node(nodeName => 'test'); + $node->nodeValue(100); + $doc->appendChild($node); + } + + print "Create 30000 nodes: ",tv_interval($t,[gettimeofday]),"\n"; + + $t = [gettimeofday]; + $builder = IMPL::DOM::Navigator::SimpleBuilder->new(); + my $reader = IMPL::DOM::XMLReader->new( Navigator => $builder ); + + $reader->ParseFile("Resources/large.xml"); + print "Parsing large Xml file: ",tv_interval($t,[gettimeofday]),"\n"; + + my $count = selectAll($builder->Document); + my $len = length $builder->Document->text; + print "Total nodes loaded: $count, data length: $len\n"; + + $t = [gettimeofday]; + $builder = IMPL::DOM::Navigator::SimpleBuilder->new(); + my $reader2 = IMPL::DOM::XMLReader->new( Navigator => $builder ); + + $reader2->ParseFile("Resources/person_info.xml"); + print "Parsing small Xml file: ",tv_interval($t,[gettimeofday]),"\n"; + +sub selectAll { + my $node = shift; + $node,map selectAll($_),@{$node->childNodes}; +} \ No newline at end of file