annotate _test/read_repo.pl @ 4:8001dc056331

fixed problems with sax parser fixed handling of empty repos added log autoflush
author sergey
date Wed, 23 Oct 2013 11:49:16 +0400
parents f2a86753b494
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
1 #!/usr/bin/perl
93b83e3c38d6 initial commit
cin
parents:
diff changeset
2 use strict;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
3 use warnings;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
4
93b83e3c38d6 initial commit
cin
parents:
diff changeset
5 use Digest;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
6 use File::Path qw(make_path);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
7 use IMPL::require {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
8 XMLReader => 'XML::LibXML::Reader',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
9 UserAgent => 'LWP::UserAgent',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
10 Request => 'HTTP::Request',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
11 MDParser => 'Yours::Parsers::MDParser',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
12 PMDParser => 'Yours::Parsers::PMDParser',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
13 Dumper => 'Data::Dumper',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
14 Uncompress => 'IO::Uncompress::AnyUncompress'
93b83e3c38d6 initial commit
cin
parents:
diff changeset
15 };
93b83e3c38d6 initial commit
cin
parents:
diff changeset
16
93b83e3c38d6 initial commit
cin
parents:
diff changeset
17 BEGIN {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
18 XMLReader->import;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
19
93b83e3c38d6 initial commit
cin
parents:
diff changeset
20 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
21
93b83e3c38d6 initial commit
cin
parents:
diff changeset
22 my %digestTypes = (
93b83e3c38d6 initial commit
cin
parents:
diff changeset
23 sha512 => 'SHA-512',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
24 sha384 => 'SHA-384',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
25 sha256 => 'SHA-256',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
26 sha1 => 'SHA-1',
93b83e3c38d6 initial commit
cin
parents:
diff changeset
27 md5 => 'MD5'
93b83e3c38d6 initial commit
cin
parents:
diff changeset
28 );
93b83e3c38d6 initial commit
cin
parents:
diff changeset
29
93b83e3c38d6 initial commit
cin
parents:
diff changeset
30 my $repoUrl =
93b83e3c38d6 initial commit
cin
parents:
diff changeset
31 'http://download.opensuse.org/repositories/Mono/openSUSE_12.3/';
93b83e3c38d6 initial commit
cin
parents:
diff changeset
32 my $repoDir = 'repo';
1
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
33 my $validate = 0;
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
34
93b83e3c38d6 initial commit
cin
parents:
diff changeset
35 my $agent = UserAgent->new();
93b83e3c38d6 initial commit
cin
parents:
diff changeset
36 $agent->env_proxy();
93b83e3c38d6 initial commit
cin
parents:
diff changeset
37
93b83e3c38d6 initial commit
cin
parents:
diff changeset
38 # список файлов, которые должны быть в репозитарии, заполняется по мере загрузки/проверки целостности
93b83e3c38d6 initial commit
cin
parents:
diff changeset
39 my %files;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
40 # список каталогов, которые должны быть в репозитарии, заполняется по мере загрузки/проверки целостности
93b83e3c38d6 initial commit
cin
parents:
diff changeset
41 my %dirs;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
42
93b83e3c38d6 initial commit
cin
parents:
diff changeset
43 print "loading metadata\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
44
93b83e3c38d6 initial commit
cin
parents:
diff changeset
45 make_path(File::Spec->catdir($repoDir,'repodata')) unless -d File::Spec->catdir($repoDir,'repodata');
93b83e3c38d6 initial commit
cin
parents:
diff changeset
46
93b83e3c38d6 initial commit
cin
parents:
diff changeset
47 # загружаем основные метаданные
93b83e3c38d6 initial commit
cin
parents:
diff changeset
48
93b83e3c38d6 initial commit
cin
parents:
diff changeset
49 my $mdLocation = "repodata/repomd.xml";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
50 my $mdFile = MakeLocalName($mdLocation);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
51
93b83e3c38d6 initial commit
cin
parents:
diff changeset
52 my @initial = (
93b83e3c38d6 initial commit
cin
parents:
diff changeset
53 $mdLocation,
93b83e3c38d6 initial commit
cin
parents:
diff changeset
54 "repodata/repomd.xml.asc",
93b83e3c38d6 initial commit
cin
parents:
diff changeset
55 "repodata/repomd.xml.key"
93b83e3c38d6 initial commit
cin
parents:
diff changeset
56 );
93b83e3c38d6 initial commit
cin
parents:
diff changeset
57
1
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
58 $files{MakeLocalName($_)} = { location => "${repoUrl}$_" } foreach @initial;
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
59
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
60 unless ($validate) {
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
61 foreach my $initLocation (@initial) {
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
62 my $file = MakeLocalName($initLocation);
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
63 my $resp = $agent->get(
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
64 "${repoUrl}$initLocation",
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
65 ':content_file' => $file
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
66 );
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
67
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
68 die "failed to load metadata $initLocation: ", $resp->code, " ", $resp->message
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
69 unless $resp->is_success;
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
70 }
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
71 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
72
93b83e3c38d6 initial commit
cin
parents:
diff changeset
73 my $parser = MDParser->new();
93b83e3c38d6 initial commit
cin
parents:
diff changeset
74 $parser->Parse( { location => $mdFile, no_blanks => 1 } );
93b83e3c38d6 initial commit
cin
parents:
diff changeset
75
93b83e3c38d6 initial commit
cin
parents:
diff changeset
76 # загружаем метаданные о пакетах
93b83e3c38d6 initial commit
cin
parents:
diff changeset
77 my %indexMd;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
78 foreach my $md (@{$parser->data->{data} || []}) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
79 $indexMd{$md->{type}} = $md;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
80
93b83e3c38d6 initial commit
cin
parents:
diff changeset
81 print "\t$md->{type}: $md->{location}\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
82
93b83e3c38d6 initial commit
cin
parents:
diff changeset
83 my $file = MakeLocalName($md->{location});
93b83e3c38d6 initial commit
cin
parents:
diff changeset
84 $md->{file} = $file;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
85 $files{$file} = $md;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
86
93b83e3c38d6 initial commit
cin
parents:
diff changeset
87 unless (-f $file) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
88 my $resp = $agent->get(
93b83e3c38d6 initial commit
cin
parents:
diff changeset
89 "${repoUrl}$md->{location}",
93b83e3c38d6 initial commit
cin
parents:
diff changeset
90 ':content_file' => $file
93b83e3c38d6 initial commit
cin
parents:
diff changeset
91 );
93b83e3c38d6 initial commit
cin
parents:
diff changeset
92 die "failed to load $md->{location}: ", $resp->code, " ", $resp->message
93b83e3c38d6 initial commit
cin
parents:
diff changeset
93 unless $resp->is_success;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
94 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
95 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
96
93b83e3c38d6 initial commit
cin
parents:
diff changeset
97 my $primaryMd = $indexMd{primary}{file};
93b83e3c38d6 initial commit
cin
parents:
diff changeset
98
93b83e3c38d6 initial commit
cin
parents:
diff changeset
99 my $hdata = Uncompress->new($primaryMd)
93b83e3c38d6 initial commit
cin
parents:
diff changeset
100 or die "failed to uncompress $primaryMd";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
101
93b83e3c38d6 initial commit
cin
parents:
diff changeset
102 print "processing contents\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
103
93b83e3c38d6 initial commit
cin
parents:
diff changeset
104 PMDParser->new(sub {
2
f2a86753b494 implemented object model
cin
parents: 1
diff changeset
105 my ($package) = @_;
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
106 my $location = $package->{location};
93b83e3c38d6 initial commit
cin
parents:
diff changeset
107
93b83e3c38d6 initial commit
cin
parents:
diff changeset
108 my $file = MakeLocalName($location);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
109 $files{$file} = $package;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
110
93b83e3c38d6 initial commit
cin
parents:
diff changeset
111
93b83e3c38d6 initial commit
cin
parents:
diff changeset
112
93b83e3c38d6 initial commit
cin
parents:
diff changeset
113 unless (-f $file) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
114 my $size = sprintf("%0.2fM",$package->{size}/(1024*1024));
93b83e3c38d6 initial commit
cin
parents:
diff changeset
115 print "\tfetch $location [${size}]\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
116
93b83e3c38d6 initial commit
cin
parents:
diff changeset
117 $agent->get("${repoUrl}$location", ":content_file" => $file);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
118 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
119 })->Parse({ IO => $hdata, no_blanks => 1 });
93b83e3c38d6 initial commit
cin
parents:
diff changeset
120
93b83e3c38d6 initial commit
cin
parents:
diff changeset
121 print "cleanup\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
122
93b83e3c38d6 initial commit
cin
parents:
diff changeset
123 foreach my $dir (keys %dirs) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
124 print "\t$dir\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
125 if (opendir(my $hdir, $dir)) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
126 while(my $file = readdir $hdir) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
127 next if $file eq '.' || $file eq '..';
93b83e3c38d6 initial commit
cin
parents:
diff changeset
128 my $fullPath = File::Spec->catfile($dir,$file);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
129 next unless -f $fullPath;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
130
93b83e3c38d6 initial commit
cin
parents:
diff changeset
131 unless( $files{$fullPath} ){
93b83e3c38d6 initial commit
cin
parents:
diff changeset
132 print "\t\t- $file\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
133 unlink $fullPath;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
134 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
135 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
136 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
137 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
138
93b83e3c38d6 initial commit
cin
parents:
diff changeset
139 print "validating\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
140
1
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
141 my @bad;
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
142
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
143 while(my ($file,$md) = each %files) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
144 if (my $checksum = $md->{checksum}) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
145 if( my $type = $digestTypes{lc($checksum->{type})} ) {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
146 if(open my $hfile, "<$file") {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
147 binmode $hfile;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
148 my $digest = Digest->new($type)->addfile($hfile)->hexdigest;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
149 next if $digest eq $checksum->{value};
93b83e3c38d6 initial commit
cin
parents:
diff changeset
150
93b83e3c38d6 initial commit
cin
parents:
diff changeset
151 print "\t$file: $digest ne $checksum->{value}\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
152 } else {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
153 print "\t$file: ", -f $file ? "unable to open" : "missing","\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
154 }
1
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
155 push @bad,$md;
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
156 } else {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
157 print "\t$file: unknown hash algorithm: $checksum->{type}\n";
93b83e3c38d6 initial commit
cin
parents:
diff changeset
158 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
159 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
160 }
93b83e3c38d6 initial commit
cin
parents:
diff changeset
161
1
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
162 print "fixing\n";
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
163
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
164 foreach my $md (@bad) {
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
165 my $location = $md->{location};
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
166
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
167 my $file = MakeLocalName($location);
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
168
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
169 my $size = sprintf("%0.2fM",$md->{size}/(1024*1024));
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
170 print "\tfetch $location [${size}]\n";
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
171
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
172 $agent->get("${repoUrl}$location", ":content_file" => $file);
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
173 }
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
174
30a88ad2b2fd added validation mode and automatic errors fixing
cin
parents: 0
diff changeset
175 print "total files: ", scalar keys %files, "\n";
0
93b83e3c38d6 initial commit
cin
parents:
diff changeset
176
93b83e3c38d6 initial commit
cin
parents:
diff changeset
177 sub MakeLocalName {
93b83e3c38d6 initial commit
cin
parents:
diff changeset
178 my ($url) = @_;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
179
93b83e3c38d6 initial commit
cin
parents:
diff changeset
180 my @parts = split /\//, $url;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
181 my $file = pop @parts;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
182
93b83e3c38d6 initial commit
cin
parents:
diff changeset
183 my $dir = File::Spec->catdir($repoDir,@parts);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
184
93b83e3c38d6 initial commit
cin
parents:
diff changeset
185 make_path($dir)
93b83e3c38d6 initial commit
cin
parents:
diff changeset
186 unless $dirs{$dir};
93b83e3c38d6 initial commit
cin
parents:
diff changeset
187 $dirs{$dir} = 1;
93b83e3c38d6 initial commit
cin
parents:
diff changeset
188
93b83e3c38d6 initial commit
cin
parents:
diff changeset
189 return File::Spec->catfile($dir,$file);
93b83e3c38d6 initial commit
cin
parents:
diff changeset
190 }