From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id CDA561FB06 for ; Fri, 24 Sep 2021 10:56:45 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/5] clone|fetch|--mirror: cull manifest in partial mirrors Date: Fri, 24 Sep 2021 10:56:44 +0000 Message-Id: <20210924105645.8627-5-e@80x24.org> In-Reply-To: <20210924105645.8627-1-e@80x24.org> References: <20210924105645.8627-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This makes it easier for users to enable fetching on a previously read-only epoch. Prior to this change, users were required to delete manifest.js.gz in addition to adding the writable bit. Now, they just have to "chmod +w $EPOCH_DIR". --- lib/PublicInbox/Fetch.pm | 17 +++++++++++++++-- lib/PublicInbox/LeiMirror.pm | 24 ++++++++++++++++++++---- t/v2mirror.t | 24 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 464ffe12..7f60b619 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -12,6 +12,8 @@ use PublicInbox::LEI; use PublicInbox::LeiCurl; use PublicInbox::LeiMirror; use File::Temp (); +use PublicInbox::Config; +use IO::Compress::Gzip qw(gzip $GzipError); sub new { bless {}, __PACKAGE__ } @@ -81,7 +83,7 @@ sub do_manifest ($$$) { } my (undef, $v1_path, @v2_epochs) = PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path); - [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf ]; + [ 200, $v1_path, \@v2_epochs, $muri, $ft, $mf, $m1 ]; } sub get_fingerprint2 { @@ -133,7 +135,7 @@ EOM PublicInbox::LeiMirror::write_makefile($dir, $ibx_ver); $lei->qerr("# inbox URL: $ibx_uri/"); my $res = do_manifest($lei, $dir, $ibx_uri) or return; - my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf) = @$res; + my ($code, $v1_path, $v2_epochs, $muri, $ft, $mf, $m1) = @$res; if ($code == 404) { # any pre-manifest.js.gz instances running? Just fetch all # existing ones and unconditionally try cloning the next @@ -145,6 +147,7 @@ EOM } else { $code == 200 or die "BUG unexpected code $code\n"; } + my $mculled; if ($ibx_ver == 2) { defined($v1_path) and warn <, WTF? @@ -153,6 +156,12 @@ EOM my ($nr) = (m!/([0-9]+)\.git\z!g); $skip->{$nr} ? () : $nr; } @$v2_epochs; + if ($m1 && scalar keys %$skip) { + my $re = join('|', keys %$skip); + my @del = grep(m!/git/$re\.git\z!, keys %$m1); + delete @$m1{@del}; + $mculled = 1; + } } else { $git_dir[0] = $dir; } @@ -193,6 +202,10 @@ EOM for my $i (@new_epoch) { $mg->epoch_cfg_set($i) } if ($ft) { my $fn = $ft->filename; + if ($mculled) { + my $json = PublicInbox::Config->json->encode($m1); + gzip(\$json => $fn) or die "gzip: $GzipError"; + } rename($fn, $mf) or die "E: rename($fn, $mf): $!\n"; $ft->unlink_on_destroy(0); } diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index fe81b967..1ab5e0d8 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -6,7 +6,9 @@ package PublicInbox::LeiMirror; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); +use PublicInbox::Config; use IO::Uncompress::Gunzip qw(gunzip $GunzipError); +use IO::Compress::Gzip qw(gzip $GzipError); use PublicInbox::Spawn qw(popen_rd spawn run_die); use File::Temp (); use Fcntl qw(SEEK_SET O_CREAT O_EXCL O_WRONLY); @@ -267,14 +269,14 @@ EOM close $fh or die "close:($f): $!"; } -sub clone_v2 ($$) { - my ($self, $v2_epochs) = @_; +sub clone_v2 ($$;$) { + my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref my $lei = $self->{lei}; my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return; my $pfx = $curl->torsocks($lei, (values %$v2_epochs)[0]) or return; my $dst = $self->{dst}; my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs); - my (@src_edst, @read_only); + my (@src_edst, @read_only, @skip_nr); for my $nr (sort { $a <=> $b } keys %$v2_epochs) { my $uri = $v2_epochs->{$nr}; my $src = $uri->as_string; @@ -289,8 +291,15 @@ failed to extract epoch number from $src } else { # create a placeholder so users only need to chmod +w init_placeholder($src, $edst); push @read_only, $edst; + push @skip_nr, $nr; } } + if (@skip_nr) { # filter out the epochs we skipped + my $re = join('|', @skip_nr); + my @del = grep(m!/git/$re\.git\z!, keys %$m); + delete @$m{@del}; + $self->{-culled_manifest} = 1; + } my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock'; _try_config($self); my $on_destroy = $lk->lock_for_scope($$); @@ -379,13 +388,20 @@ EOM my ($n) = ("$uri" =~ m!/([0-9]+)\.git\z!); $n => $uri->clone } @v2_epochs; - clone_v2($self, \%v2_epochs); + clone_v2($self, \%v2_epochs, $m); } elsif (defined $v1_path) { clone_v1($self); } else { die "E: confused by <$uri>, possible matches:\n\t", join(', ', sort keys %$m), "\n"; } + if (delete $self->{-culled_manifest}) { # set by clone_v2 + # write the smaller manifest if epochs were skipped so + # users won't have to delete manifest if they +w an + # epoch they no longer want to skip + my $json = PublicInbox::Config->json->encode($m); + gzip(\$json => $fn) or die "gzip: $GzipError"; + } my $fin = "$self->{dst}/manifest.js.gz"; rename($fn, $fin) or die "E: rename($fn, $fin): $!"; $ft->unlink_on_destroy(0); diff --git a/t/v2mirror.t b/t/v2mirror.t index 1231b72d..fa4a717d 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -9,6 +9,7 @@ use PublicInbox::Spawn qw(which); require_git(2.6); require_cmd('curl'); local $ENV{HOME} = abs_path('t'); +use IO::Uncompress::Gunzip qw(gunzip $GunzipError); # Integration tests for HTTP cloning + mirroring require_mods(qw(Plack::Util Plack::Builder @@ -288,6 +289,29 @@ if ('test read-only epoch dirs') { is_deeply(\@g2, \@g, 'cloned again'); is(scalar(grep { -w $_ } @g2), scalar(@w) + 1, 'got one more cloned epoch'); + + # make 0.git writable and fetch into it, relies on culled manifest + chmod(0755, $g2[0]) or xbail "chmod: $!"; + my @before = glob("$g2[0]/objects/*/*"); + run_script([qw(-fetch -q)], undef, { -C => $dst }); + is($?, 0, 'no error from partial fetch'); + my @after = glob("$g2[0]/objects/*/*"); + ok(scalar(@before) < scalar(@after), 'fetched after chmod 0755 0.git'); + + # ensure culled manifest is maintained after fetch + gunzip("$dst/manifest.js.gz" => \(my $m), MultiStream => 1) or + xbail "gunzip: $GunzipError"; + $m = PublicInbox::Config->json->decode($m); + for my $k (keys %$m) { # /$name/git/$N.git + my ($nr) = ($k =~ m!/git/([0-9]+)\.git\z!); + ok(-w "$dst/git/$nr.git", "writable $nr.git in manifest"); + } + for my $ro (grep { !-w $_ } @g2) { + my ($nr) = ($ro =~ m!/git/([0-9]+)\.git\z!); + is(grep(m!/git/$nr\.git\z!, keys %$m), 0, + "read-only $nr.git not in manifest") + or xbail([sort keys %$m]); + } } my $err = '';