user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] fetch: ignore non-writable epoch dirs
@ 2021-09-17 11:00 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-09-17 11:00 UTC (permalink / raw)
  To: meta

This will eventually be useful for maintaing partial mirrors.

Keeping inline with the original public-inbox-fetch philosophy,
there are no additional config files to manage:
the user merely needs to remove write permissions to an $N.git
directory to prevent it from being updated.

Re-enabling updates just requires restoring write permission.
---
 Documentation/public-inbox-fetch.pod | 14 ++++++++++++-
 lib/PublicInbox/Fetch.pm             | 19 ++++++++++++++----
 t/v2mirror.t                         | 30 +++++++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/Documentation/public-inbox-fetch.pod b/Documentation/public-inbox-fetch.pod
index 28d5638d..833df862 100644
--- a/Documentation/public-inbox-fetch.pod
+++ b/Documentation/public-inbox-fetch.pod
@@ -19,7 +19,19 @@ It does not run L<public-inbox-index(1)>, making it suitable
 for maintaining git-only backups.
 
 For v2 inboxes, it will maintain C<$INBOX_DIR/manifest.js.gz>
-file to speed up future invocations.
+file to speed up future invocations.  It always safe to remove
+manifest.js.gz, it is merely an optimization and will be
+restored on the next invocation.
+
+To prevent fetches on any v2 epoch, use L<chmod(1)> to remove
+write permissions to the top-level of the epoch.  For example,
+to disable fetches on epoch 4:
+
+	chmod a-w $INBOX_DIR/git/4.git
+
+If you wish to re-enable fetches to the epoch:
+
+	chmod u+w $INBOX_DIR/git/4.git
 
 =head1 OPTIONS
 
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 993e5b19..0bd6502c 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -96,7 +96,7 @@ sub do_fetch { # main entry point
 	my $ibx_ver;
 	$lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
 	my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver);
-	my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch);
+	my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch, $skip);
 	if ($ibx_ver == 1) {
 		my $url = remote_url($lei, $dir) //
 			die "E: $dir missing remote.origin.url\n";
@@ -108,6 +108,10 @@ sub do_fetch { # main entry point
 		my ($git_url, $epoch);
 		for my $nr (@epochs) { # try newest epoch, first
 			my $edir = "$dir/git/$nr.git";
+			unless (-d $edir && -w _) { # must be writable dir
+				$skip->{$nr} = 1;
+				next;
+			}
 			if (defined(my $url = remote_url($lei, $edir))) {
 				$git_url = $url;
 				$epoch = $nr;
@@ -116,6 +120,8 @@ sub do_fetch { # main entry point
 				warn "W: $edir missing remote.origin.url\n";
 			}
 		}
+		@epochs = grep { !$skip->{$_} } @epochs if $skip;
+		$skip //= {}; # makes code below easier
 		$git_url or die "Unable to determine git URL\n";
 		my $inbox_url = $git_url;
 		$inbox_url =~ s!/git/$epoch(?:\.git)?/?\z!! or
@@ -132,7 +138,10 @@ EOM
 		# any pre-manifest.js.gz instances running? Just fetch all
 		# existing ones and unconditionally try cloning the next
 		$v2_epochs = [ map { "$dir/git/$_.git" } @epochs ];
-		push @$v2_epochs, "$dir/git/".($epochs[-1] + 1) if @epochs;
+		if (@epochs) {
+			my $n = $epochs[-1] + 1;
+			push @$v2_epochs, "$dir/git/$n.git" if !$skip->{$n};
+		}
 	} else {
 		$code == 200 or die "BUG unexpected code $code\n";
 	}
@@ -140,8 +149,10 @@ EOM
 		defined($v1_path) and warn <<EOM;
 E: got v1 `$v1_path' when expecting v2 epoch(s) in <$muri>, WTF?
 EOM
-		@git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b }
-			map { my ($nr) = (m!/([0-9]+)\.git\z!g) } @$v2_epochs;
+		@git_dir = map { "$dir/git/$_.git" } sort { $a <=> $b } map {
+				my ($nr) = (m!/([0-9]+)\.git\z!g);
+				$skip->{$nr} ? () : $nr;
+			} @$v2_epochs;
 	} else {
 		$git_dir[0] = $dir;
 	}
diff --git a/t/v2mirror.t b/t/v2mirror.t
index 3df5d053..665a4d59 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -3,7 +3,7 @@
 use strict;
 use v5.10.1;
 use PublicInbox::TestCommon;
-use File::Path qw(remove_tree);
+use File::Path qw(remove_tree make_path);
 use Cwd qw(abs_path);
 require_git(2.6);
 require_cmd('curl');
@@ -235,6 +235,34 @@ for my $d (@new_epochs) {
 		'include.path set');
 }
 
+if ('test read-only epoch dirs') {
+	my @git = ('git', "--git-dir=$new_epochs[0]");
+	my $get_objs = [@git,
+		qw(cat-file --buffer --batch-check --batch-all-objects)];
+	my $before = [sort xqx($get_objs)];
+
+	remove_tree(map { "$new_epochs[0]/$_" } qw(objects refs/heads));
+	chmod(0555, $new_epochs[0]) or xbail "chmod: $!";
+
+	# force a refetch
+	unlink("$tmpdir/m/manifest.js.gz") or xbail "unlink: $!";
+
+	run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m"}) or
+		xbail '-fetch failed';
+
+	ok(!-d "$new_epochs[0]/objects", 'no objects after fetch to R/O dir');
+
+	chmod(0755, $new_epochs[0]) or xbail "chmod: $!";
+	mkdir("$new_epochs[0]/objects") or xbail "mkdir: $!";
+	mkdir("$new_epochs[0]/refs/heads") or xbail "mkdir: $!";
+
+	my $err = '';
+	run_script([qw(-fetch -q)], undef, {-C => "$tmpdir/m", 2 => \$err}) or
+		xbail '-fetch failed '.$err;
+	is_deeply([ sort xqx($get_objs) ], $before,
+		'fetch restored objects once GIT_DIR became writable');
+}
+
 ok($td->kill, 'killed httpd');
 $td->join;
 

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-17 11:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-17 11:00 [PATCH] fetch: ignore non-writable epoch dirs Eric Wong

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).