From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A9EF81F8C8 for ; Fri, 17 Sep 2021 11:00:23 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] fetch: ignore non-writable epoch dirs Date: Fri, 17 Sep 2021 11:00:23 +0000 Message-Id: <20210917110023.6929-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This will eventually be useful for maintaing partial mirrors. Keeping inline with the original public-inbox-fetch philosophy, there are no additional config files to manage: the user merely needs to remove write permissions to an $N.git directory to prevent it from being updated. Re-enabling updates just requires restoring write permission. --- Documentation/public-inbox-fetch.pod | 14 ++++++++++++- lib/PublicInbox/Fetch.pm | 19 ++++++++++++++---- t/v2mirror.t | 30 +++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/Documentation/public-inbox-fetch.pod b/Documentation/public-inbox-fetch.pod index 28d5638d..833df862 100644 --- a/Documentation/public-inbox-fetch.pod +++ b/Documentation/public-inbox-fetch.pod @@ -19,7 +19,19 @@ It does not run L, making it suitable for maintaining git-only backups. For v2 inboxes, it will maintain C<$INBOX_DIR/manifest.js.gz> -file to speed up future invocations. +file to speed up future invocations. It always safe to remove +manifest.js.gz, it is merely an optimization and will be +restored on the next invocation. + +To prevent fetches on any v2 epoch, use L to remove +write permissions to the top-level of the epoch. For example, +to disable fetches on epoch 4: + + chmod a-w $INBOX_DIR/git/4.git + +If you wish to re-enable fetches to the epoch: + + chmod u+w $INBOX_DIR/git/4.git =head1 OPTIONS diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm index 993e5b19..0bd6502c 100644 --- a/lib/PublicInbox/Fetch.pm +++ b/lib/PublicInbox/Fetch.pm @@ -96,7 +96,7 @@ sub do_fetch { # main entry point my $ibx_ver; $lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return; my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver); - my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch); + my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch, $skip); if ($ibx_ver == 1) { my $url = remote_url($lei, $dir) // die "E: $dir missing remote.origin.url\n"; @@ -108,6 +108,10 @@ sub do_fetch { # main entry point my ($git_url, $epoch); for my $nr (@epochs) { # try newest epoch, first my $edir = "$dir/git/$nr.git"; + unless (-d $edir && -w _) { # must be writable dir + $skip->{$nr} = 1; + next; + } if (defined(my $url = remote_url($lei, $edir))) { $git_url = $url; $epoch = $nr; @@ -116,6 +120,8 @@ sub do_fetch { # main entry point warn "W: $edir missing remote.origin.url\n"; } } + @epochs = grep { !$skip->{$_} } @epochs if $skip; + $skip //= {}; # makes code below easier $git_url or die "Unable to determine git URL\n"; my $inbox_url = $git_url; $inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or @@ -132,7 +138,10 @@ EOM # any pre-manifest.js.gz instances running? Just fetch all # existing ones and unconditionally try cloning the next $v2_epochs = [ map { "$dir/git/$_.git" } @epochs ]; - push @$v2_epochs, "$dir/git/".($epochs[-1] + 1) if @epochs; + if (@epochs) { + my $n = $epochs[-1] + 1; + push @$v2_epochs, "$dir/git/$n.git" if !$skip->{$n}; + } } else { $code == 200 or die "BUG unexpected code $code\n"; } @@ -140,8 +149,10 @@ EOM defined($v1_path) and warn <, WTF? EOM - @git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } - map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @$v2_epochs; + @git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } map { + my ($nr) = (m!/([0-9]+)\.git\z!g); + $skip->{$nr} ? () : $nr; + } @$v2_epochs; } else { $git_dir[0] = $dir; } diff --git a/t/v2mirror.t b/t/v2mirror.t index 3df5d053..665a4d59 100644 --- a/t/v2mirror.t +++ b/t/v2mirror.t @@ -3,7 +3,7 @@ use strict; use v5.10.1; use PublicInbox::TestCommon; -use File::Path qw(remove_tree); +use File::Path qw(remove_tree make_path); use Cwd qw(abs_path); require_git(2.6); require_cmd('curl'); @@ -235,6 +235,34 @@ for my $d (@new_epochs) { 'include.path set'); } +if ('test read-only epoch dirs') { + my @git = ('git', "--git-dir=$new_epochs[0]"); + my $get_objs = [@git, + qw(cat-file --buffer --batch-check --batch-all-objects)]; + my $before = [sort xqx($get_objs)]; + + remove_tree(map { "$new_epochs[0]/$_" } qw(objects refs/heads)); + chmod(0555, $new_epochs[0]) or xbail "chmod: $!"; + + # force a refetch + unlink("$tmpdir/m/manifest.js.gz") or xbail "unlink: $!"; + + run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m"}) or + xbail '-fetch failed'; + + ok(!-d "$new_epochs[0]/objects", 'no objects after fetch to R/O dir'); + + chmod(0755, $new_epochs[0]) or xbail "chmod: $!"; + mkdir("$new_epochs[0]/objects") or xbail "mkdir: $!"; + mkdir("$new_epochs[0]/refs/heads") or xbail "mkdir: $!"; + + my $err = ''; + run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m", 2 => \$err}) or + xbail '-fetch failed '.$err; + is_deeply([ sort xqx($get_objs) ], $before, + 'fetch restored objects once GIT_DIR became writable'); +} + ok($td->kill, 'killed httpd'); $td->join;