diff options
-rw-r--r-- | lib/PublicInbox/LeiMirror.pm | 13 | ||||
-rw-r--r-- | lib/PublicInbox/TestCommon.pm | 1 | ||||
-rw-r--r-- | t/v2mirror.t | 78 |
3 files changed, 82 insertions, 10 deletions
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index 53f7dd31..fe81b967 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -42,7 +42,8 @@ sub try_scrape { # we grep with URL below, we don't want Subject/From headers # making us clone random URLs - my @urls = ($html =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); + my @html = split(/<hr>/, $html); + my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g); my $url = $uri->as_string; chop($url) eq '/' or die "BUG: $uri not canonicalized"; @@ -184,7 +185,9 @@ sub run_reap { my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid); waitpid($pid, 0) == $pid or die "waitpid @$cmd: $!"; @$reap = (); # cancel reap - $? + my $ret = $?; + $? = 0; # don't let it influence normal exit + $ret; } sub clone_v1 { @@ -358,7 +361,11 @@ sub try_manifest { return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing return $lei->child_error($cerr, "@$cmd failed"); } - my $m = decode_manifest($ft, $fn, $uri); + my $m = eval { decode_manifest($ft, $fn, $uri) }; + if ($@) { + warn $@; + return try_scrape($self); + } my ($path_pfx, $v1_path, @v2_epochs) = deduce_epochs($m, $path); if (@v2_epochs) { # It may be possible to have v1 + v2 in parallel someday: diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index aff34853..cd706e0e 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -469,6 +469,7 @@ sub start_script { $ENV{LISTEN_PID} = $$; $ENV{LISTEN_FDS} = $fds; } + if ($opt->{-C}) { chdir($opt->{-C}) or die "chdir: $!" } $0 = join(' ', @$cmd); if ($sub) { eval { PublicInbox::DS->Reset }; diff --git a/t/v2mirror.t b/t/v2mirror.t index 20a8daaa..1231b72d 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -5,6 +5,7 @@ use v5.10.1; use PublicInbox::TestCommon; use File::Path qw(remove_tree make_path); use Cwd qw(abs_path); +use PublicInbox::Spawn qw(which); require_git(2.6); require_cmd('curl'); local $ENV{HOME} = abs_path('t'); @@ -23,7 +24,8 @@ my $pi_config = "$tmpdir/config"; open my $fh, '>', $pi_config or die "open($pi_config): $!"; print $fh <<"" or die "print $pi_config: $!"; [publicinbox "v2"] - inboxdir = $tmpdir/in +; using "mainrepo" rather than "inboxdir" for v1.1.0-pre1 WWW compat below + mainrepo = $tmpdir/in address = test\@example.com close $fh or die "close($pi_config): $!"; @@ -62,11 +64,11 @@ $v2w->done; } $ibx->cleanup; -my $sock = tcp_server(); +local $ENV{TEST_IPV4_ONLY} = 1; # plackup (below) doesn't do IPv6 +my $rdr = { 3 => tcp_server() }; my @cmd = ('-httpd', '-W0', "--stdout=$tmpdir/out", "--stderr=$tmpdir/err"); -my $td = start_script(\@cmd, undef, { 3 => $sock }); -my ($host, $port) = tcp_host_port($sock); -$sock = undef; +my $td = start_script(\@cmd, undef, $rdr); +my ($host, $port) = tcp_host_port(delete $rdr->{3}); @cmd = (qw(-clone -q), "http://$host:$port/v2/", "$tmpdir/m"); run_script(\@cmd) or xbail '-clone'; @@ -288,7 +290,69 @@ if ('test read-only epoch dirs') { 'got one more cloned epoch'); } -ok($td->kill, 'killed httpd'); -$td->join; +my $err = ''; +my $v110 = xqx([qw(git rev-parse v1.1.0-pre1)], undef, { 2 => \$err }); +SKIP: { + skip("no detected public-inbox GIT_DIR ($err)", 1) if $?; + # using plackup to test old PublicInbox::WWW since -httpd from + # back then relied on some packages we no longer depend on + my $plackup = which('plackup') or skip('no plackup in path', 1); + require PublicInbox::Lock; + chomp $v110; + my ($base) = ($0 =~ m!\b([^/]+)\.[^\.]+\z!); + my $wt = "t/data-gen/$base.pre-manifest"; + my $lk = bless { lock_path => __FILE__ }, 'PublicInbox::Lock'; + $lk->lock_acquire; + my $psgi = "$wt/app.psgi"; + if (!-f $psgi) { # checkout a pre-manifest.js.gz version + my $t = File::Temp->new(TEMPLATE => 'g-XXXX', TMPDIR => 1); + my $env = { GIT_INDEX_FILE => $t->filename }; + xsys([qw(git read-tree), $v110], $env) and xbail 'read-tree'; + xsys([qw(git checkout-index -a), "--prefix=$wt/"], $env) + and xbail 'checkout-index'; + my $f = "$wt/app.psgi.tmp.$$"; + open my $fh, '>', $f or xbail $!; + print $fh <<'EOM' or xbail $!; +use Plack::Builder; +use PublicInbox::WWW; +my $www = PublicInbox::WWW->new; +builder { enable 'Head'; sub { $www->call(@_) } } +EOM + close $fh or xbail $!; + rename($f, $psgi) or xbail $!; + } + $lk->lock_release; + + $rdr->{run_mode} = 0; + $rdr->{-C} = $wt; + my $cmd = [$plackup, qw(-Enone -Ilib), "--host=$host", "--port=$port"]; + $td->join('TERM'); + open $rdr->{2}, '>>', "$tmpdir/plackup.err.log" or xbail "open: $!"; + open $rdr->{1}, '>>&', $rdr->{2} or xbail "open: $!"; + $td = start_script($cmd, { PERL5LIB => 'lib' }, $rdr); + # wait for plackup socket()+bind()+listen() + my %opt = ( Proto => 'tcp', Type => Socket::SOCK_STREAM(), + PeerAddr => "$host:$port" ); + for (0..50) { + tick(); + last if IO::Socket::INET->new(%opt); + } + my $dst = "$tmpdir/scrape"; + @cmd = (qw(-clone -q), "http://$host:$port/v2", $dst); + run_script(\@cmd, undef, { 2 => \(my $err = '') }); + is($?, 0, 'scraping clone on old PublicInbox::WWW') + or diag $err; + my @g_all = glob("$dst/git/*.git"); + ok(scalar(@g_all) > 1, 'cloned multiple epochs'); + + remove_tree($dst); + @cmd = (qw(-clone -q --epoch=~0), "http://$host:$port/v2", $dst); + run_script(\@cmd, undef, { 2 => \($err = '') }); + is($?, 0, 'partial scraping clone on old PublicInbox::WWW'); + my @g_last = grep { -w $_ } glob("$dst/git/*.git"); + is_deeply(\@g_last, [ $g_all[-1] ], 'partial clone of ~0 worked'); + + $td->join('TERM'); +} done_testing; |