view _test/read_repo.pl @ 4:8001dc056331

fixed problems with sax parser fixed handling of empty repos added log autoflush
author sergey
date Wed, 23 Oct 2013 11:49:16 +0400
parents f2a86753b494
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use warnings;

use Digest;
use File::Path qw(make_path);
use IMPL::require {
	XMLReader => 'XML::LibXML::Reader',
	UserAgent => 'LWP::UserAgent',
	Request   => 'HTTP::Request',
	MDParser  => 'Yours::Parsers::MDParser',
	PMDParser => 'Yours::Parsers::PMDParser',
	Dumper    => 'Data::Dumper',
	Uncompress => 'IO::Uncompress::AnyUncompress'
};

BEGIN {
	XMLReader->import;

}

my %digestTypes = (
	sha512 => 'SHA-512',
	sha384 => 'SHA-384',
	sha256 => 'SHA-256',
	sha1 => 'SHA-1',
	md5 => 'MD5'
);

my $repoUrl =
  'http://download.opensuse.org/repositories/Mono/openSUSE_12.3/';
my $repoDir = 'repo';
my $validate = 0;

my $agent = UserAgent->new();
$agent->env_proxy();

# список файлов, которые должны быть в репозитарии, заполняется по мере загрузки/проверки целостности
my %files;
# список каталогов, которые должны быть в репозитарии, заполняется по мере загрузки/проверки целостности
my %dirs;

print "loading metadata\n";

make_path(File::Spec->catdir($repoDir,'repodata')) unless -d File::Spec->catdir($repoDir,'repodata');

# загружаем основные метаданные

my $mdLocation = "repodata/repomd.xml";
my $mdFile = MakeLocalName($mdLocation);

my @initial = (
	$mdLocation,
	"repodata/repomd.xml.asc",
	"repodata/repomd.xml.key"
);

$files{MakeLocalName($_)} = { location => "${repoUrl}$_" } foreach @initial;

unless ($validate) {
	foreach my $initLocation (@initial) {
		my $file = MakeLocalName($initLocation);
		my $resp = $agent->get(
			"${repoUrl}$initLocation",
			':content_file' => $file
		);
		
		die "failed to load metadata $initLocation: ", $resp->code, " ", $resp->message
			unless $resp->is_success;
	}
}

my $parser = MDParser->new();
$parser->Parse( { location => $mdFile, no_blanks => 1 } );

# загружаем метаданные о пакетах 
my %indexMd;
foreach my $md (@{$parser->data->{data} || []}) {
	$indexMd{$md->{type}} = $md;
	
	print "\t$md->{type}: $md->{location}\n";	
	
	my $file = MakeLocalName($md->{location});
	$md->{file} = $file;
	$files{$file} = $md;
	
	unless (-f $file) {
		my $resp = $agent->get(
			"${repoUrl}$md->{location}",
			':content_file' => $file
		);
		die "failed to load $md->{location}: ", $resp->code, " ", $resp->message
			unless $resp->is_success;
	}
}

my $primaryMd = $indexMd{primary}{file};

my $hdata = Uncompress->new($primaryMd)
	or die "failed to uncompress $primaryMd";
	
print "processing contents\n";

PMDParser->new(sub {
	my ($package) = @_;
	my $location = $package->{location};
	
	my $file = MakeLocalName($location);
	$files{$file} = $package;
	
	
	
	unless (-f $file) {
		my $size = sprintf("%0.2fM",$package->{size}/(1024*1024));
		print "\tfetch $location [${size}]\n";
		
		$agent->get("${repoUrl}$location", ":content_file" => $file);
	}
})->Parse({ IO => $hdata, no_blanks => 1 });

print "cleanup\n";

foreach my $dir (keys %dirs) {
	print "\t$dir\n";
	if (opendir(my $hdir, $dir)) {
		while(my $file = readdir $hdir) {
			next if $file eq '.' || $file eq '..';
			my $fullPath = File::Spec->catfile($dir,$file);
			next unless -f $fullPath;
			
			unless( $files{$fullPath} ){
				print "\t\t- $file\n";
				unlink $fullPath;
			}
		}
	}
}

print "validating\n";

my @bad;

while(my ($file,$md) = each %files) {
	if (my $checksum = $md->{checksum}) {
		if( my $type = $digestTypes{lc($checksum->{type})} ) {
			if(open my $hfile, "<$file") {
				binmode $hfile;
				my $digest = Digest->new($type)->addfile($hfile)->hexdigest;
				next if $digest eq $checksum->{value};
				
				print "\t$file: $digest ne $checksum->{value}\n";
			} else {
				print "\t$file: ", -f $file ? "unable to open" : "missing","\n"; 
			}
			push @bad,$md;
		} else {
			print "\t$file: unknown hash algorithm: $checksum->{type}\n";
		}
	}
}

print "fixing\n";

foreach my $md (@bad) {
	my $location = $md->{location};
	
	my $file = MakeLocalName($location);

	my $size = sprintf("%0.2fM",$md->{size}/(1024*1024));
	print "\tfetch $location [${size}]\n";
		
	$agent->get("${repoUrl}$location", ":content_file" => $file);
}

print "total files: ", scalar keys %files, "\n";

sub MakeLocalName {
	my ($url) = @_;
	
	my @parts = split /\//, $url;
	my $file = pop @parts;
	
	my $dir = File::Spec->catdir($repoDir,@parts);
	
	make_path($dir)
		unless $dirs{$dir};
	$dirs{$dir} = 1;
	
	return File::Spec->catfile($dir,$file);
}