From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E9AA91FB07 for ; Fri, 24 Sep 2021 10:56:45 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 5/5] fetch: support v2 w/o manifest on old WWW Date: Fri, 24 Sep 2021 10:56:45 +0000 Message-Id: <20210924105645.8627-6-e@80x24.org> In-Reply-To: <20210924105645.8627-1-e@80x24.org> References: <20210924105645.8627-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: There may still be pre-manifest.js.gz versions of PublicInbox::WWW running and serving v2 inboxes. While -clone and "add-external --mirror" were working, -fetch was failing due to 301 redirect to $INBOX_URL/manifest.js.gz/ and not the expected 404. Update the code to deal with a JSON decode error (from the 301) and ensure v2 epochs detection is correct (and not using a shadowed variable). --- lib/PublicInbox/Fetch.pm | 12 +++++++----- t/v2mirror.t | 8 ++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 7f60b619..7881b402 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -60,11 +60,13 @@ sub do_manifest ($$$) { $opt->{$_} = $lei->{$_} for (0..2); my $cerr = PublicInbox::LeiMirror::run_reap($lei, $curl_cmd, $opt); if ($cerr) { - return [ 404 ] if ($cerr >> 8) == 22; # 404 Missing + return [ 404, $muri ] if ($cerr >> 8) == 22; # 404 Missing $lei->child_error($cerr, "@$curl_cmd failed"); return; } - my $m1 = PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + my $m1 = eval { + PublicInbox::LeiMirror::decode_manifest($ft, $fn, $muri); + } or return [ 404, $muri ]; my $mdiff = { %$m1 }; # filter out unchanged entries. We check modified, too, since @@ -83,7 +85,7 @@ sub do_manifest ($$$) { } my (undef, $v1_path, @v2_epochs) = PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path); - [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf, $m1 ]; + [ 200, $muri, $v1_path, \@v2_epochs, $ft, $mf, $m1 ]; } sub get_fingerprint2 { @@ -106,7 +108,7 @@ sub do_fetch { # main entry point } else { # v2: require PublicInbox::MultiGit; $mg = PublicInbox::MultiGit->new($dir, 'all.git', 'git'); - my @epochs = $mg->git_epochs; + @epochs = $mg->git_epochs; my ($git_url, $epoch); for my $nr (@epochs) { # try newest epoch, first my $edir = "$dir/git/$nr.git"; @@ -135,7 +137,7 @@ EOM PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver); $lei->qerr("# inbox URL: $ibx_uri/"); my $res = do_manifest($lei, $dir, $ibx_uri) or return; - my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf, $m1) = @$res; + my ($code, $muri, $v1_path, $v2_epochs, $ft, $mf, $m1) = @$res; if ($code == 404) { # any pre-manifest.js.gz instances running? Just fetch all # existing ones and unconditionally try cloning the next diff --git a/t/v2mirror.t b/t/v2mirror.t index fa4a717d..a625646d 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -376,6 +376,14 @@ EOM my @g_last = grep { -w $_ } glob("$dst/git/*.git"); is_deeply(\@g_last, [ $g_all[-1] ], 'partial clone of ~0 worked'); + chmod(0755, $g_all[0]) or xbail "chmod $!"; + my @before = glob("$g_all[0]/objects/*/*"); + run_script([qw(-fetch -v)], undef, { -C => $dst, 2 => \($err = '') }); + is($?, 0, 'scraping fetch on old PublicInbox::WWW') or diag $err; + my @after = glob("$g_all[0]/objects/*/*"); + ok(scalar(@before) < scalar(@after), + 'fetched 0.git after enabling write-bit'); + $td->join('TERM'); }