mirror of
https://github.com/torproject/webwml.git
synced 2024-11-27 11:40:21 +00:00
227 lines
6.2 KiB
Perl
Executable File
227 lines
6.2 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
use warnings;
|
|
use strict;
|
|
use Data::Dumper;
|
|
use LWP::Simple;
|
|
use HTML::LinkExtor;
|
|
use LWP;
|
|
use Date::Parse;
|
|
use Date::Format;
|
|
use Digest::SHA qw(sha256_hex);
|
|
|
|
# This is Free Software (GPLv3)
|
|
# http://www.gnu.org/licenses/gpl-3.0.txt
|
|
|
|
print "Creating LWP agent ($LWP::VERSION)...\n";
|
|
my $lua = LWP::UserAgent->new(
|
|
keep_alive => 1,
|
|
timeout => 30,
|
|
agent => "Tor MirrorCheck Agent"
|
|
);
|
|
|
|
sub sanitize {
|
|
my $taintedData = shift;
|
|
my $cleanedData;
|
|
my $whitelist = '-a-zA-Z0-9: +';
|
|
|
|
# clean the data, return cleaned data
|
|
$taintedData =~ s/[^$whitelist]//go;
|
|
$cleanedData = $taintedData;
|
|
|
|
return $cleanedData;
|
|
}
|
|
sub ExtractLinks {
|
|
my $content = shift;
|
|
my $url = shift;
|
|
my @links;
|
|
|
|
my $parser = HTML::LinkExtor->new(undef, $url);
|
|
$parser->parse($content);
|
|
foreach my $linkarray($parser->links)
|
|
{
|
|
my ($elt_type, $attr_name, $attr_value) = @$linkarray;
|
|
if ($elt_type eq 'a' && $attr_name eq 'href' && $attr_value =~ /\/$/ && $attr_value =~ /^$url/)
|
|
{
|
|
push @links, Fetch($attr_value, \&ExtractLinks);
|
|
}
|
|
elsif ($attr_value =~ /\.(xpi|dmg|exe|tar\.gz)$/)
|
|
#elsif ($attr_value =~ /\.(asc)$/) # small pgp files easier to test with
|
|
{
|
|
push @links, $attr_value;
|
|
}
|
|
}
|
|
return @links;
|
|
}
|
|
|
|
sub ExtractDate {
|
|
my $content = shift;
|
|
$content = sanitize($content);
|
|
my $date = str2time($content);
|
|
|
|
if ($date) {
|
|
print "\tExtractDate($content) = $date\n";
|
|
return $date;
|
|
} else {
|
|
print "\tExtractDate($content) = ?\n";
|
|
return undef;
|
|
}
|
|
}
|
|
|
|
sub ExtractSig {
|
|
my $content = shift;
|
|
my $url = shift;
|
|
my $sig = sha256_hex($content);
|
|
print "\tExtractSig($url) = $sig\n";
|
|
return $sig;
|
|
}
|
|
|
|
sub Fetch {
|
|
my ($url, $sub) = @_; # Base url for mirror
|
|
$|++; # unbuffer stdout to show progress
|
|
|
|
print "\nGET $url: ";
|
|
my $request = new HTTP::Request GET => "$url";
|
|
my $result = $lua->request($request);
|
|
my $code = $result->code();
|
|
print "$code\n";
|
|
|
|
if ($result->is_success && $code eq "200"){
|
|
my $content = $result->content;
|
|
if ($content) {
|
|
return $sub->($content, $url);
|
|
} else {
|
|
print "Unable to fetch $url, empty content returned.\n";
|
|
}
|
|
}
|
|
|
|
return undef;
|
|
}
|
|
my @columns;
|
|
sub LoadMirrors {
|
|
open(CSV, "<", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!";
|
|
my $line = <CSV>;
|
|
chomp($line);
|
|
@columns = split(/\s*,\s*/, $line);
|
|
my @mirrors;
|
|
while ($line = <CSV>)
|
|
{
|
|
chomp($line);
|
|
my @values = split(/\s*,\s*/, $line);
|
|
my %server;
|
|
for (my $i = 0; $i < scalar(@columns); $i++)
|
|
{
|
|
$server{$columns[$i]} = $values[$i] || '';
|
|
}
|
|
$server{updateDate} = str2time($server{updateDate}) if ($server{updateDate});
|
|
push @mirrors, {%server};
|
|
}
|
|
close(CSV);
|
|
return @mirrors;
|
|
}
|
|
|
|
sub DumpMirrors {
|
|
my @m = @_;
|
|
open(CSV, ">", "include/tor-mirrors.csv") or die "Cannot open tor-mirrors.csv: $!";
|
|
print CSV join(", ", @columns) . "\n";
|
|
foreach my $server(@m) {
|
|
$server->{updateDate} = gmtime($server->{updateDate}) if ($server->{updateDate});
|
|
print CSV join(", ", map($server->{$_}, @columns));
|
|
print CSV "\n";
|
|
}
|
|
|
|
close(CSV);
|
|
}
|
|
|
|
my @m = LoadMirrors();
|
|
my $count = scalar(@m);
|
|
print "We have a total of $count mirrors\n";
|
|
print "Fetching the last updated date for each mirror.\n";
|
|
|
|
my $tortime = Fetch("https://www.torproject.org/project/trace/www-master.torproject.org", \&ExtractDate);
|
|
my @torfiles = Fetch("https://www.torproject.org/dist/", \&ExtractLinks);
|
|
my %randomtorfiles;
|
|
|
|
for (1 .. 1)
|
|
{
|
|
my $r = int(rand(scalar(@torfiles)));
|
|
my $suffix = $torfiles[$r];
|
|
$suffix =~ s/^https:\/\/www.torproject.org//;
|
|
$randomtorfiles{$suffix} = Fetch($torfiles[$r], \&ExtractSig);
|
|
}
|
|
|
|
print "Using these files for sig matching:\n";
|
|
print join("\n", keys %randomtorfiles);
|
|
print "\n";
|
|
|
|
# Adjust official Tor time by out-of-date offset: number of days * seconds per day
|
|
$tortime -= 1 * 172800;
|
|
print "The official time for Tor is $tortime. \n";
|
|
|
|
for(my $server = 0; $server < scalar(@m); $server++) {
|
|
foreach my $serverType('httpWebsiteMirror', 'httpsWebsiteMirror', 'ftpWebsiteMirror', 'httpDistMirror', 'httpsDistMirror')
|
|
{
|
|
if ($m[$server]->{$serverType}) {
|
|
my $updateDate = Fetch("$m[$server]->{$serverType}/project/trace/www-master.torproject.org", \&ExtractDate);
|
|
|
|
if ($updateDate) {
|
|
$m[$server]->{updateDate} = $updateDate;
|
|
$m[$server]->{sigMatched} = 1;
|
|
foreach my $randomtorfile(keys %randomtorfiles) {
|
|
my $sig = Fetch("$m[$server]->{$serverType}/$randomtorfile", \&ExtractSig);
|
|
if (!$sig) {
|
|
$m[$server]->{sigMatched} = 0;
|
|
last;
|
|
} elsif ($sig ne $randomtorfiles{$randomtorfile}) {
|
|
$m[$server]->{sigMatched} = 0;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub PrintServer {
|
|
my $server = shift;
|
|
print OUT <<"END";
|
|
\n<tr>\n
|
|
<td>$server->{isoCC}</td>\n
|
|
<td>$server->{orgName}</td>\n
|
|
<td>Up to date</td>\n
|
|
END
|
|
|
|
my %prettyNames = (
|
|
httpWebsiteMirror => "http",
|
|
httpsWebsiteMirror => "https",
|
|
ftpWebsiteMirror => "ftp",
|
|
rsyncWebsiteMirror => "rsync",
|
|
httpDistMirror => "http",
|
|
httpsDistMirror => "https",
|
|
rsyncDistMirror => "rsync", );
|
|
|
|
foreach my $precious ( sort keys %prettyNames )
|
|
{
|
|
if ($server->{$precious}) {
|
|
print OUT " <td><a href=\"" . $server->{$precious} . "\">" .
|
|
"$prettyNames{$precious}</a></td>\n";
|
|
} else { print OUT " <td> - </td>\n"; }
|
|
}
|
|
|
|
print OUT "</tr>\n";
|
|
}
|
|
|
|
|
|
my $outFile = "include/mirrors-table.wmi";
|
|
open(OUT, "> $outFile") or die "Can't open $outFile: $!";
|
|
|
|
# Here's where we open a file and print some wml include goodness
|
|
# This is sorted from last known recent update to unknown update times
|
|
foreach my $server ( sort { $b->{updateDate} <=> $a->{updateDate} } grep {$_->{updateDate} && $_->{updateDate} > $tortime && $_->{sigMatched}} @m ) {
|
|
PrintServer($server);
|
|
}
|
|
|
|
DumpMirrors(@m);
|
|
|
|
close(OUT);
|