user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 37/95] lei_mirror: require Perl v5.12+
    2022-11-28  5:30  6% ` [PATCH 01/95] clone: support multi-inbox clone Eric Wong
@ 2022-11-28  5:31  9% ` Eric Wong
  1 sibling, 0 replies; 5+ results
From: Eric Wong @ 2022-11-28  5:31 UTC (permalink / raw)
  To: meta

Another tiny step towards improve startup performance by
relying on Perl 5.12 strictness and avoiding strict.pm
---
 lib/PublicInbox/LeiMirror.pm | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index c3512d43..279ce30e 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -3,8 +3,7 @@
 
 # "lei add-external --mirror" support (also "public-inbox-clone");
 package PublicInbox::LeiMirror;
-use strict;
-use v5.10.1;
+use v5.12;
 use parent qw(PublicInbox::IPC);
 use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use IO::Compress::Gzip qw(gzip $GzipError);

^ permalink raw reply related	[relevance 9%]

* [PATCH 01/95] clone: support multi-inbox clone
  @ 2022-11-28  5:30  6% ` Eric Wong
  2022-11-28  5:31  9% ` [PATCH 37/95] lei_mirror: require Perl v5.12+ Eric Wong
  1 sibling, 0 replies; 5+ results
From: Eric Wong @ 2022-11-28  5:30 UTC (permalink / raw)
  To: meta

This is to ensure we can do `public-inbox-clone https://yhbt.net/lore'
or `public-inbox-clone https://lore.kernel.org/' and clone all
inboxes (and whatever else git stores).
---
 lib/PublicInbox/Fetch.pm     |  17 +++-
 lib/PublicInbox/LeiMirror.pm | 162 ++++++++++++++++++++++-------------
 t/www_listing.t              |  34 +++++++-
 3 files changed, 152 insertions(+), 61 deletions(-)

diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 364271e8..3b6aa389 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -44,6 +44,21 @@ sub remote_url ($$) {
 	undef
 }
 
+# PSGI mount prefixes and manifest.js.gz prefixes don't always align...
+# TODO: remove, handle multi-inbox fetch
+sub deduce_epochs ($$) {
+	my ($m, $path) = @_;
+	my ($v1_ent, @v2_epochs);
+	my $path_pfx = '';
+	$path =~ s!/+\z!!;
+	do {
+		$v1_ent = $m->{$path};
+		@v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m);
+	} while (!defined($v1_ent) && !@v2_epochs &&
+		$path =~ s!\A(/[^/]+)/!/! and $path_pfx .= $1);
+	($path_pfx, $v1_ent ? $path : undef, @v2_epochs);
+}
+
 sub do_manifest ($$$) {
 	my ($lei, $dir, $ibx_uri) = @_;
 	my $muri = URI->new("$ibx_uri/manifest.js.gz");
@@ -88,7 +103,7 @@ sub do_manifest ($$$) {
 		return;
 	}
 	my (undef, $v1_path, @v2_epochs) =
-		PublicInbox::LeiMirror::deduce_epochs($mdiff, $ibx_uri->path);
+		deduce_epochs($mdiff, $ibx_uri->path);
 	[ 200, $muri, $v1_path, \@v2_epochs, $ft, $mf, $m1 ];
 }
 
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index ed8e4842..e356b5c5 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # "lei add-external --mirror" support (also "public-inbox-clone");
@@ -58,7 +58,7 @@ sub try_scrape {
 	if (my @v2_urls = grep(m!\A\Q$url\E/[0-9]+\z!, @urls)) {
 		my %v2_epochs = map {
 			my ($n) = (m!/([0-9]+)\z!);
-			$n => URI->new($_)
+			$n => [ URI->new($_), '' ]
 		} @v2_urls; # uniq
 		return clone_v2($self, \%v2_epochs);
 	}
@@ -104,26 +104,27 @@ sub ft_rename ($$$) {
 
 sub _get_txt { # non-fatal
 	my ($self, $endpoint, $file, $mode) = @_;
-	my $uri = URI->new($self->{src});
+	my $uri = URI->new($self->{cur_src} // $self->{src});
 	my $lei = $self->{lei};
 	my $path = $uri->path;
 	chop($path) eq '/' or die "BUG: $uri not canonicalized";
 	$uri->path("$path/$endpoint");
-	my $ft = File::Temp->new(TEMPLATE => "$file-XXXX", DIR => $self->{dst});
+	my $dst = $self->{cur_dst} // $self->{dst};
+	my $ft = File::Temp->new(TEMPLATE => "$file-XXXX", DIR => $dst);
 	my $opt = { 0 => $lei->{0}, 1 => $lei->{1}, 2 => $lei->{2} };
 	my $cmd = $self->{curl}->for_uri($lei, $uri,
 					qw(--compressed -R -o), $ft->filename);
 	my $cerr = run_reap($lei, $cmd, $opt);
 	return "$uri missing" if ($cerr >> 8) == 22;
 	return "# @$cmd failed (non-fatal)" if $cerr;
-	ft_rename($ft, "$self->{dst}/$file", $mode);
+	ft_rename($ft, "$dst/$file", $mode);
 	undef; # success
 }
 
 # tries the relatively new /$INBOX/_/text/config/raw endpoint
 sub _try_config {
 	my ($self) = @_;
-	my $dst = $self->{dst};
+	my $dst = $self->{cur_dst} // $self->{dst};
 	if (!-d $dst || !mkdir($dst)) {
 		require File::Path;
 		File::Path::mkpath($dst);
@@ -132,7 +133,7 @@ sub _try_config {
 	my $err = _get_txt($self,
 			qw(_/text/config/raw inbox.config.example), 0444);
 	return warn($err, "\n") if $err;
-	my $f = "$self->{dst}/inbox.config.example";
+	my $f = "$dst/inbox.config.example";
 	my $cfg = PublicInbox::Config->git_config_dump($f, $self->{lei}->{2});
 	my $ibx = $self->{ibx} = {};
 	for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) {
@@ -144,7 +145,8 @@ sub _try_config {
 
 sub set_description ($) {
 	my ($self) = @_;
-	my $f = "$self->{dst}/description";
+	my $dst = $self->{cur_dst} // $self->{dst};
+	my $f = "$dst/description";
 	open my $fh, '+>>', $f or die "open($f): $!";
 	seek($fh, 0, SEEK_SET) or die "seek($f): $!";
 	chomp(my $d = do { local $/; <$fh> } // die "read($f): $!");
@@ -152,7 +154,8 @@ sub set_description ($) {
 			$d =~ /^Unnamed repository/ || $d !~ /\S/) {
 		seek($fh, 0, SEEK_SET) or die "seek($f): $!";
 		truncate($fh, 0) or die "truncate($f): $!";
-		print $fh "mirror of $self->{src}\n" or die "print($f): $!";
+		my $src = $self->{cur_src} // $self->{src};
+		print $fh "mirror of $src\n" or die "print($f): $!";
 		close $fh or die "close($f): $!";
 	}
 }
@@ -172,7 +175,7 @@ sub index_cloned_inbox {
 			address => [ 'lei@example.com' ],
 			version => $iv,
 		};
-		$ibx->{inboxdir} = $self->{dst};
+		$ibx->{inboxdir} = $self->{cur_dst} // $self->{dst};
 		PublicInbox::Inbox->new($ibx);
 		PublicInbox::InboxWritable->new($ibx);
 		my $opt = {};
@@ -188,6 +191,7 @@ sub index_cloned_inbox {
 		PublicInbox::Admin::progress_prepare($opt, $lei->{2});
 		PublicInbox::Admin::index_inbox($ibx, undef, $opt);
 	}
+	return if defined $self->{cur_dst};
 	open my $x, '>', "$self->{dst}/mirror.done"; # for _wq_done_wait
 }
 
@@ -205,21 +209,22 @@ sub clone_v1 {
 	my ($self) = @_;
 	my $lei = $self->{lei};
 	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
-	my $uri = URI->new($self->{src});
+	my $uri = URI->new($self->{cur_src} // $self->{src});
 	defined($lei->{opt}->{epoch}) and
 		die "$uri is a v1 inbox, --epoch is not supported\n";
 	my $pfx = $curl->torsocks($lei, $uri) or return;
+	my $dst = $self->{cur_dst} // $self->{dst};
 	my $cmd = [ @$pfx, clone_cmd($lei, my $opt = {}),
-			$uri->as_string, $self->{dst} ];
+			$uri->as_string, $dst ];
 	my $cerr = run_reap($lei, $cmd, $opt);
 	return $lei->child_error($cerr, "@$cmd failed") if $cerr;
 	_try_config($self);
-	write_makefile($self->{dst}, 1);
+	write_makefile($dst, 1);
 	index_cloned_inbox($self, 1);
 }
 
 sub parse_epochs ($$) {
-	my ($opt_epochs, $v2_epochs) = @_; # $epcohs "LOW..HIGH"
+	my ($opt_epochs, $v2_epochs) = @_; # $epochs "LOW..HIGH"
 	$opt_epochs // return; # undef => all epochs
 	my ($lo, $dotdot, $hi, @extra) = split(/(\.\.)/, $opt_epochs);
 	undef($lo) if ($lo // '') eq '';
@@ -282,12 +287,13 @@ sub clone_v2 ($$;$) {
 	my ($self, $v2_epochs, $m) = @_; # $m => manifest.js.gz hashref
 	my $lei = $self->{lei};
 	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
-	my $pfx = $curl->torsocks($lei, (values %$v2_epochs)[0]) or return;
-	my $dst = $self->{dst};
+	my $first_uri = (map { $_->[0] } values %$v2_epochs)[0];
+	my $pfx = $curl->torsocks($lei, $first_uri) or return;
+	my $dst = $self->{cur_dst} // $self->{dst};
 	my $want = parse_epochs($lei->{opt}->{epoch}, $v2_epochs);
-	my (@src_edst, @read_only, @skip_nr);
+	my (@src_edst, @read_only, @skip);
 	for my $nr (sort { $a <=> $b } keys %$v2_epochs) {
-		my $uri = $v2_epochs->{$nr};
+		my ($uri, $key) = @{$v2_epochs->{$nr}};
 		my $src = $uri->as_string;
 		my $edst = $dst;
 		$src =~ m!/([0-9]+)(?:\.git)?\z! or die <<"";
@@ -300,15 +306,11 @@ failed to extract epoch number from $src
 		} else { # create a placeholder so users only need to chmod +w
 			init_placeholder($src, $edst);
 			push @read_only, $edst;
-			push @skip_nr, $nr;
+			push @skip, $key;
 		}
 	}
-	if (@skip_nr) { # filter out the epochs we skipped
-		my $re = join('|', @skip_nr);
-		my @del = grep(m!/git/$re\.git\z!, keys %$m);
-		delete @$m{@del};
-		$self->{-culled_manifest} = 1;
-	}
+	# filter out the epochs we skipped
+	$self->{-culled_manifest} = 1 if delete(@$m{@skip});
 	my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
 	_try_config($self);
 	my $on_destroy = $lk->lock_for_scope($$);
@@ -326,25 +328,11 @@ failed to extract epoch number from $src
 		my @st = stat($edst) or die "stat($edst): $!";
 		chmod($st[2] & 0555, $edst) or die "chmod(a-w, $edst): $!";
 	}
-	write_makefile($self->{dst}, 2);
+	write_makefile($dst, 2);
 	undef $on_destroy; # unlock
 	index_cloned_inbox($self, 2);
 }
 
-# PSGI mount prefixes and manifest.js.gz prefixes don't always align...
-sub deduce_epochs ($$) {
-	my ($m, $path) = @_;
-	my ($v1_ent, @v2_epochs);
-	my $path_pfx = '';
-	$path =~ s!/+\z!!;
-	do {
-		$v1_ent = $m->{$path};
-		@v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m);
-	} while (!defined($v1_ent) && !@v2_epochs &&
-		$path =~ s!\A(/[^/]+)/!/! and $path_pfx .= $1);
-	($path_pfx, $v1_ent ? $path : undef, @v2_epochs);
-}
-
 sub decode_manifest ($$$) {
 	my ($fh, $fn, $uri) = @_;
 	my $js;
@@ -357,6 +345,40 @@ sub decode_manifest ($$$) {
 	$m;
 }
 
+sub multi_inbox ($$$) {
+	my ($self, $path, $m) = @_;
+
+	# assuming everything not v2 is v1, for now
+	my @v1 = sort grep(!m!.+/git/[0-9]+\.git\z!, keys %$m);
+	my @v2_epochs = sort grep(m!.+/git/[0-9]+\.git\z!, keys %$m);
+	my $v2 = {};
+
+	for (@v2_epochs) {
+		m!\A/(.+)/git/[0-9]+\.git\z! or die "BUG: $_";
+		push @{$v2->{$1}}, $_;
+	}
+	my $n = scalar(keys %$v2) + scalar(@v1);
+	my $ret; # { v1 => [ ... ], v2 => { $inbox_name => [ epochs ] }}
+	$ret->{v1} = \@v1 if @v1;
+	$ret->{v2} = $v2 if keys %$v2;
+	my $path_pfx = '';
+
+	# PSGI mount prefixes and manifest.js.gz prefixes don't always align...
+	if (@v2_epochs) {
+		until (grep(m!\A\Q$$path\E/git/[0-9]+\.git\z!,
+				@v2_epochs) == @v2_epochs) {
+			$$path =~ s!\A(/[^/]+)/!/! or last;
+			$path_pfx .= $1;
+		}
+	} elsif (@v1) {
+		while (!defined($m->{$$path}) && $$path =~ s!\A(/[^/]+)/!/!) {
+			$path_pfx .= $1;
+		}
+	}
+	($path_pfx, $n, $ret);
+}
+
+# FIXME: this gets confused by single inbox instance w/ global manifest.js.gz
 sub try_manifest {
 	my ($self) = @_;
 	my $uri = URI->new($self->{src});
@@ -384,25 +406,48 @@ sub try_manifest {
 		warn $@;
 		return try_scrape($self);
 	}
-	my ($path_pfx, $v1_path, @v2_epochs) = deduce_epochs($m, $path);
-	if (@v2_epochs) {
-		# It may be possible to have v1 + v2 in parallel someday:
-		warn(<<EOM) if defined $v1_path;
-# `$v1_path' appears to be a v1 inbox while v2 epochs exist:
-# @v2_epochs
-# ignoring $v1_path (use --inbox-version=1 to force v1 instead)
+	my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m);
+	if (my $v2 = delete $multi->{v2}) {
+		for my $name (sort keys %$v2) {
+			my $epochs = delete $v2->{$name};
+			my %v2_epochs = map {
+				$uri->path($n > 1 ? $path_pfx.$path.$_
+						: $path_pfx.$_);
+				my ($e) = ("$uri" =~ m!/([0-9]+)\.git\z!);
+				$e // die "no [0-9]+\.git in `$uri'";
+				$e => [ $uri->clone, $_ ];
+			} @$epochs;
+			("$uri" =~ m!\A(.+/)git/[0-9]+\.git\z!) or
+				die "BUG: `$uri' !~ m!/git/[0-9]+.git!";
+			local $self->{cur_src} = $1;
+			local $self->{cur_dst} = $self->{dst};
+			if ($n > 1 && $uri->path =~ m!\A\Q$path_pfx$path\E/(.+)/
+							git/[0-9]+\.git\z!x) {
+				$self->{cur_dst} .= "/$1";
+			}
+			index($self->{cur_dst}, "\n") >= 0 and die <<EOM;
+E: `$self->{cur_dst}' must not contain newline
 EOM
-		my %v2_epochs = map {
-			$uri->path($path_pfx.$_);
-			my ($n) = ("$uri" =~ m!/([0-9]+)\.git\z!);
-			$n => $uri->clone
-		} @v2_epochs;
-		clone_v2($self, \%v2_epochs, $m);
-	} elsif (defined $v1_path) {
-		clone_v1($self);
-	} else {
-		die "E: confused by <$uri>, possible matches:\n\t",
-			join("\n\t", sort keys %$m), "\n";
+			clone_v2($self, \%v2_epochs, $m);
+		}
+	}
+	if (my $v1 = delete $multi->{v1}) {
+		my $p = $path_pfx.$path;
+		chop($p) if substr($p, -1, 1) eq '/';
+		$uri->path($p);
+		for my $name (@$v1) {
+			local $self->{cur_src} = "$uri";
+			local $self->{cur_dst} = $self->{dst};
+			if ($n > 1) {
+				$self->{cur_dst} .= $name;
+				$self->{cur_src} .= $name;
+			}
+			index($self->{cur_dst}, "\n") >= 0 and die <<EOM;
+E: `$self->{cur_dst}' must not contain newline
+EOM
+			$self->{cur_src} .= '/';
+			clone_v1($self, 1);
+		}
 	}
 	if (delete $self->{-culled_manifest}) { # set by clone_v2
 		# write the smaller manifest if epochs were skipped so
@@ -414,6 +459,7 @@ EOM
 		utime($mtime, $mtime, $fn) or die "utime(..., $fn): $!";
 	}
 	ft_rename($ft, "$self->{dst}/manifest.js.gz", 0666);
+	open my $x, '>', "$self->{dst}/mirror.done"; # for _wq_done_wait
 }
 
 sub start_clone_url {
diff --git a/t/www_listing.t b/t/www_listing.t
index c556a2d7..e88bfbc5 100644
--- a/t/www_listing.t
+++ b/t/www_listing.t
@@ -1,5 +1,5 @@
 #!perl -w
-# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 # manifest.js.gz generation and grok-pull integration test
 use strict; use v5.10.1; use PublicInbox::TestCommon;
@@ -115,10 +115,38 @@ SKIP: {
 
 	my $env = { PI_CONFIG => $cfgfile };
 	my $cmd = [ '-httpd', '-W0', "--stdout=$out", "--stderr=$err" ];
+	my $psgi = "$tmpdir/pfx.psgi";
+	{
+		open my $psgi_fh, '>', $psgi or xbail "open: $!";
+		print $psgi_fh <<'EOM' or xbail "print $!";
+use PublicInbox::WWW;
+use Plack::Builder;
+my $www = PublicInbox::WWW->new;
+builder {
+	enable 'Head';
+	mount '/pfx/' => sub { $www->call(@_) }
+}
+EOM
+		close $psgi_fh or xbail "close: $!";
+	}
+
+	# ensure prefixed mount full clones work:
+	$td = start_script([@$cmd, $psgi], $env, { 3 => $sock });
+	my $opt = { 2 => \(my $clone_err = '') };
+	ok(run_script(['-clone', "http://$host:$port/pfx", "$tmpdir/pfx" ],
+		undef, $opt), 'pfx clone w/pfx') or diag "clone_err=$clone_err";
+	undef $td;
+
 	$td = start_script($cmd, $env, { 3 => $sock });
 
 	# default publicinboxGrokManifest match=domain default
 	tiny_test($json, $host, $port);
+
+	# normal full clone on /
+	$clone_err = '';
+	ok(run_script(['-clone', "http://$host:$port/", "$tmpdir/full" ],
+		undef, $opt), 'full clone') or diag "clone_err=$clone_err";
+
 	undef $td;
 
 	print $fh <<"" or xbail "print $!";
@@ -127,9 +155,11 @@ SKIP: {
 
 	close $fh or xbail "close $!";
 	$td = start_script($cmd, $env, { 3 => $sock });
-	tiny_test($json, $host, $port, 1);
 	undef $sock;
+	tiny_test($json, $host, $port, 1);
 
+	# grok-pull sleeps a long while some places:
+	# https://lore.kernel.org/tools/20211013110344.GA10632@dcvr/
 	skip 'TEST_GROK unset', 12 unless $ENV{TEST_GROK};
 	my $grok_pull = require_cmd('grok-pull', 1) or
 		skip('grok-pull not available', 12);

^ permalink raw reply related	[relevance 6%]

* [PATCH] multi_git: hoist out common epoch/alternates handling
@ 2021-09-15 11:26  8% Eric Wong
  0 siblings, 0 replies; 5+ results
From: Eric Wong @ 2021-09-15 11:26 UTC (permalink / raw)
  To: meta

IMHO, this greatly improves code sharing and organization
between v2, extindex, and lei/store.  Common git-related
logic for these is lightly-refactored and easier to reason
about.

The impetus for this big change was to ensure inboxes
created+managed by public-inbox-{clone,fetch} could have
alternates and configs setup properly without depending on
SQLite (via V2Writable).  This change does that while
making old code shorter and better factored.
---
 MANIFEST                        |   1 +
 lib/PublicInbox/ExtSearchIdx.pm |  85 +++++++-------------
 lib/PublicInbox/Fetch.pm        |  17 ++--
 lib/PublicInbox/LeiMirror.pm    |  15 ++--
 lib/PublicInbox/LeiStore.pm     |  32 ++------
 lib/PublicInbox/MultiGit.pm     | 136 ++++++++++++++++++++++++++++++++
 lib/PublicInbox/V2Writable.pm   |  87 +++-----------------
 script/public-inbox-convert     |   2 +-
 t/lei-mirror.t                  |  15 +++-
 t/v2mirror.t                    |   9 ++-
 t/v2writable.t                  |   2 +-
 11 files changed, 222 insertions(+), 179 deletions(-)
 create mode 100644 lib/PublicInbox/MultiGit.pm

diff --git a/MANIFEST b/MANIFEST
index a1450880..640eabd1 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -270,6 +270,7 @@ lib/PublicInbox/MiscSearch.pm
 lib/PublicInbox/MsgIter.pm
 lib/PublicInbox/MsgTime.pm
 lib/PublicInbox/Msgmap.pm
+lib/PublicInbox/MultiGit.pm
 lib/PublicInbox/NDC_PP.pm
 lib/PublicInbox/NNTP.pm
 lib/PublicInbox/NNTPD.pm
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 8cdad23d..e0ba6c32 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -21,6 +21,7 @@ use Carp qw(croak carp);
 use Sys::Hostname qw(hostname);
 use POSIX qw(strftime);
 use File::Glob qw(bsd_glob GLOB_NOSORT);
+use PublicInbox::MultiGit;
 use PublicInbox::Search;
 use PublicInbox::SearchIdx qw(prepare_stack is_ancestor is_bad_blob);
 use PublicInbox::OverIdx;
@@ -1133,88 +1134,60 @@ sub idx_init { # similar to V2Writable
 
 	$self->git->cleanup;
 	my $mode = 0644;
-	my $ALL = $self->git->{git_dir}; # ALL.git
-	my $old = -d $ALL;
+	my $ALL = $self->git->{git_dir}; # topdir/ALL.git
+	my ($has_new, $alt, $seen);
 	if ($opt->{-private}) { # LeiStore
+		my $local = "$self->{topdir}/local"; # lei/store
+		$self->{mg} //= PublicInbox::MultiGit->new($self->{topdir},
+							'ALL.git', 'local');
 		$mode = 0600;
-		if (!$old) {
-			umask 077; # don't bother restoring
+		unless (-d $ALL) {
+			umask 077; # don't bother restoring for lei
 			PublicInbox::Import::init_bare($ALL);
 			$self->git->qx(qw(config core.sharedRepository 0600));
 		}
-	} else {
-		PublicInbox::Import::init_bare($ALL) unless $old;
-	}
-	my $info_dir = "$ALL/objects/info";
-	my $alt = "$info_dir/alternates";
-	my (@old, @new, %seen); # seen: st_dev + st_ino
-	if (-e $alt) {
-		open(my $fh, '<', $alt) or die "open $alt: $!";
-		$mode = (stat($fh))[2] & 07777;
-		while (my $line = <$fh>) {
-			chomp(my $d = $line);
-
-			# expand relative path (/local/ stuff)
-			substr($d, 0, 3) eq '../' and
-				$d = "$ALL/objects/$d";
-			if (my @st = stat($d)) {
-				next if $seen{"$st[0]\0$st[1]"}++;
-			} else {
-				warn "W: stat($d) failed (from $alt): $!\n";
-				next if $opt->{-idx_gc};
-			}
-			push @old, $line;
-		}
+		($alt, $seen) = $self->{mg}->read_alternates(\$mode);
+		$has_new = $self->{mg}->merge_epochs($alt, $seen);
+	} else { # extindex has no epochs
+		$self->{mg} //= PublicInbox::MultiGit->new($self->{topdir},
+							'ALL.git');
+		($alt, $seen) = $self->{mg}->read_alternates(\$mode,
+							$opt->{-idx_gc});
+		PublicInbox::Import::init_bare($ALL);
 	}
 
-	# for LeiStore, and possibly some mirror-only state
-	if (opendir(my $dh, my $local = "$self->{topdir}/local")) {
-		# highest numbered epoch first
-		for my $n (sort { $b <=> $a } map { substr($_, 0, -4) + 0 }
-				grep(/\A[0-9]+\.git\z/, readdir($dh))) {
-			my $d = "$local/$n.git/objects"; # absolute path
-			if (my @st = stat($d)) {
-				next if $seen{"$st[0]\0$st[1]"}++;
-				# favor relative paths for rename-friendliness
-				push @new, "../../local/$n.git/objects\n";
-			} else {
-				warn "W: stat($d) failed: $!\n";
-			}
-		}
-	}
 	# git-multi-pack-index(1) can speed up "git cat-file" startup slightly
-	my $dh;
 	my $git_midx = 0;
 	my $pd = "$ALL/objects/pack";
-	if (!mkdir($pd) && $!{EEXIST} && opendir($dh, $pd)) {
-		# drop stale symlinks
+	if (opendir(my $dh, $pd)) { # drop stale symlinks
 		while (defined(my $dn = readdir($dh))) {
 			if ($dn =~ /\.(?:idx|pack|promisor|bitmap|rev)\z/) {
 				my $f = "$pd/$dn";
 				unlink($f) if -l $f && !-e $f;
 			}
 		}
-		undef $dh;
+	} elsif ($!{ENOENT}) {
+		mkdir($pd) or die "mkdir($pd): $!";
+	} else {
+		die "opendir($pd): $!";
 	}
+	my $new = '';
 	for my $ibx (@{ibx_sorted($self, 'active')}) {
 		# create symlinks for multi-pack-index
 		$git_midx += symlink_packs($ibx, $pd);
 		# add new lines to our alternates file
-		my $line = $ibx->git->{git_dir} . "/objects\n";
-		chomp(my $d = $line);
+		my $d = $ibx->git->{git_dir} . '/objects';
+		next if exists $alt->{$d};
 		if (my @st = stat($d)) {
-			next if $seen{"$st[0]\0$st[1]"}++;
+			next if $seen->{"$st[0]\0$st[1]"}++;
 		} else {
 			warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n";
 			next if $opt->{-idx_gc};
 		}
-		push @new, $line;
-	}
-	if (scalar @new) {
-		push @old, @new;
-		my $o = \@old;
-		PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o);
+		$new .= "$d\n";
 	}
+	($has_new || $new ne '') and
+		$self->{mg}->write_alternates($mode, $alt, $new);
 	$git_midx and $self->with_umask(sub {
 		my @cmd = ('multi-pack-index');
 		push @cmd, '--no-progress' if ($opt->{quiet}//0) > 1;
@@ -1226,7 +1199,7 @@ sub idx_init { # similar to V2Writable
 	$self->with_umask(\&_idx_init, $self, $opt);
 	$self->{oidx}->begin_lazy;
 	$self->{oidx}->eidx_prep;
-	$self->{midx}->create_xdb if @new;
+	$self->{midx}->create_xdb if $new ne '';
 }
 
 sub _watch_commit { # PublicInbox::DS::add_timer callback
diff --git a/lib/PublicInbox/Fetch.pm b/lib/PublicInbox/Fetch.pm
index 6a6daee6..9ea55e9d 100644
--- a/lib/PublicInbox/Fetch.pm
+++ b/lib/PublicInbox/Fetch.pm
@@ -6,12 +6,11 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC);
 use URI ();
-use PublicInbox::Spawn qw(popen_rd);
+use PublicInbox::Spawn qw(popen_rd run_die);
 use PublicInbox::Admin;
 use PublicInbox::LEI;
 use PublicInbox::LeiCurl;
 use PublicInbox::LeiMirror;
-use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use File::Temp ();
 
 sub new { bless {}, __PACKAGE__ }
@@ -87,15 +86,15 @@ sub do_fetch {
 	my $ibx_ver;
 	$lei->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
 	my $dir = PublicInbox::Admin::resolve_inboxdir($cd, \$ibx_ver);
-	my ($ibx_uri, @git_dir, @epochs);
+	my ($ibx_uri, @git_dir, @epochs, $mg, @new_epoch);
 	if ($ibx_ver == 1) {
 		my $url = remote_url($lei, $dir) //
 			die "E: $dir missing remote.origin.url\n";
 		$ibx_uri = URI->new($url);
 	} else { # v2:
-		opendir my $dh, "$dir/git" or die "opendir $dir/git: $!";
-		@epochs = sort { $b <=> $a } map { substr($_, 0, -4) + 0 }
-					grep(/\A[0-9]+\.git\z/, readdir($dh));
+		require PublicInbox::MultiGit;
+		$mg = PublicInbox::MultiGit->new($dir, 'all.git', 'git');
+		my @epochs = $mg->git_epochs;
 		my ($git_url, $epoch);
 		for my $nr (@epochs) { # try newest epoch, first
 			my $edir = "$dir/git/$nr.git";
@@ -121,9 +120,7 @@ EOM
 	if ($code == 404) {
 		# any pre-manifest.js.gz instances running? Just fetch all
 		# existing ones and unconditionally try cloning the next
-		$v2_epochs = [ map {;
-				"$dir/git/$_.git";
-				} @epochs ];
+		$v2_epochs = [ map { "$dir/git/$_.git" } @epochs ];
 		push @$v2_epochs, "$dir/git/".($epochs[-1] + 1) if @epochs;
 	} else {
 		$code == 200 or die "BUG unexpected code $code\n";
@@ -154,6 +151,7 @@ EOM
 			$cmd = [ @$torsocks,
 				PublicInbox::LeiMirror::clone_cmd($lei, $opt),
 				$$e_uri, $d];
+			push @new_epoch, substr($epath, 5, -4) + 0;
 		}
 		my $cerr = PublicInbox::LeiMirror::run_reap($lei, $cmd, $opt);
 		# do not bail on clone failure if we didn't have a manifest
@@ -162,6 +160,7 @@ EOM
 			return;
 		}
 	}
+	for my $i (@new_epoch) { $mg->epoch_cfg_set($i) }
 	if ($ft) {
 		my $fn = $ft->filename;
 		rename($fn, $mf) or die "E: rename($fn, $mf): $!\n";
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index bc2e749c..c113c9de 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -1,13 +1,13 @@
 # Copyright (C) 2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
-# "lei add-external --mirror" support
+# "lei add-external --mirror" support (also "public-inbox-clone");
 package PublicInbox::LeiMirror;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC);
 use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
-use PublicInbox::Spawn qw(popen_rd spawn);
+use PublicInbox::Spawn qw(popen_rd spawn run_die);
 use File::Temp ();
 use Fcntl qw(SEEK_SET);
 
@@ -209,7 +209,6 @@ sub clone_v2 {
 	my $lei = $self->{lei};
 	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
 	my $pfx //= $curl->torsocks($lei, $v2_uris->[0]) or return;
-	my @epochs;
 	my $dst = $self->{dst};
 	my @src_edst;
 	for my $uri (@$v2_uris) {
@@ -220,17 +219,21 @@ failed to extract epoch number from $src
 
 		my $nr = $1 + 0;
 		$edst .= "/git/$nr.git";
-		push @src_edst, [ $src, $edst ];
+		push @src_edst, $src, $edst;
 	}
 	my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
 	_try_config($self);
 	my $on_destroy = $lk->lock_for_scope($$);
 	my @cmd = clone_cmd($lei, my $opt = {});
-	while (my $pair = shift(@src_edst)) {
-		my $cmd = [ @$pfx, @cmd, @$pair ];
+	while (my ($src, $edst) = splice(@src_edst, 0, 2)) {
+		my $cmd = [ @$pfx, @cmd, $src, $edst ];
 		my $cerr = run_reap($lei, $cmd, $opt);
 		return $lei->child_error($cerr, "@$cmd failed") if $cerr;
 	}
+	require PublicInbox::MultiGit;
+	my $mg = PublicInbox::MultiGit->new($dst, 'all.git', 'git');
+	$mg->fill_alternates;
+	for my $i ($mg->git_epochs) { $mg->epoch_cfg_set($i) }
 	undef $on_destroy; # unlock
 	index_cloned_inbox($self, 2);
 }
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index f81a8dae..42f574f2 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -27,7 +27,6 @@ use PublicInbox::MDA;
 use PublicInbox::Spawn qw(spawn);
 use PublicInbox::MdirReader;
 use PublicInbox::LeiToMail;
-use List::Util qw(max);
 use File::Temp ();
 use POSIX ();
 use IO::Handle (); # ->autoflush
@@ -50,19 +49,6 @@ sub rotate_bytes {
 	$_[0]->{rotate_bytes} // ((1024 * 1024 * 1024) / $_[0]->packing_factor)
 }
 
-sub git_pfx { "$_[0]->{priv_eidx}->{topdir}/local" };
-
-sub git_epoch_max  {
-	my ($self) = @_;
-	if (opendir(my $dh, $self->git_pfx)) {
-		max(map {
-			substr($_, 0, -4) + 0; # drop ".git" suffix
-		} grep(/\A[0-9]+\.git\z/, readdir($dh))) // 0;
-	} else {
-		$!{ENOENT} ? 0 : die("opendir ${\$self->git_pfx}: $!\n");
-	}
-}
-
 sub git_ident ($) {
 	my ($git) = @_;
 	my $rdr = {};
@@ -91,22 +77,16 @@ sub importer {
 		$im->done;
 		undef $im;
 		$self->checkpoint;
-		$max = $self->git_epoch_max + 1;
+		$max = $self->{priv_eidx}->{mg}->git_epochs + 1;
 	}
 	my (undef, $tl) = eidx_init($self); # acquire lock
-	my $pfx = $self->git_pfx;
-	$max //= $self->git_epoch_max;
+	$max //= $self->{priv_eidx}->{mg}->git_epochs;
 	while (1) {
-		my $latest = "$pfx/$max.git";
-		my $old = -e $latest;
-		PublicInbox::Import::init_bare($latest);
+		my $latest = $self->{priv_eidx}->{mg}->add_epoch($max);
 		my $git = PublicInbox::Git->new($latest);
-		if (!$old) {
-			$git->qx(qw(config core.sharedRepository 0600));
-			$self->done; # unlock
-			# re-acquire lock, update alternates for new epoch
-			(undef, $tl) = eidx_init($self);
-		}
+		$self->done; # unlock
+		# re-acquire lock, update alternates for new epoch
+		(undef, $tl) = eidx_init($self);
 		my $packed_bytes = $git->packed_bytes;
 		my $unpacked_bytes = $packed_bytes / $self->packing_factor;
 		if ($unpacked_bytes >= $self->rotate_bytes) {
diff --git a/lib/PublicInbox/MultiGit.pm b/lib/PublicInbox/MultiGit.pm
new file mode 100644
index 00000000..91d7998a
--- /dev/null
+++ b/lib/PublicInbox/MultiGit.pm
@@ -0,0 +1,136 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common git alternates + all.git||ALL.git management code
+package PublicInbox::MultiGit;
+use strict;
+use v5.10.1;
+use PublicInbox::Spawn qw(run_die);
+use PublicInbox::Import;
+use File::Temp 0.19;
+use List::Util qw(max);
+
+sub new {
+	my ($cls, $topdir, $all, $epfx) = @_;
+	bless {
+		topdir => $topdir, # inboxdir || extindex.*.topdir
+		all => $all, # all.git or ALL.git
+		epfx => $epfx, # "git" (inbox) or "local" (lei/store)
+	}, $cls;
+}
+
+sub read_alternates {
+	my ($self, $moderef, $prune) = @_;
+	my $objpfx = "$self->{topdir}/$self->{all}/objects/";
+	my $f = "${objpfx}info/alternates";
+	my %alt; # line => score
+	my %seen; # $st_dev\0$st_ino => count
+	my $other = 0;
+	if (open(my $fh, '<', $f)) {
+		my $is_edir = defined($self->{epfx}) ?
+			qr!\A\Q../../$self->{epfx}\E/([0-9]+)\.git/objects\z! :
+			undef;
+		$$moderef = (stat($fh))[2] & 07777;
+		for my $rel (split(/^/m, do { local $/; <$fh> })) {
+			chomp(my $dir = $rel);
+			my $score;
+			if (defined($is_edir) && $dir =~ $is_edir) {
+				$score = $1 + 0;
+				substr($dir, 0, 0) = $objpfx;
+			} else { # absolute paths, if any (extindex)
+				$score = --$other;
+			}
+			if (my @st = stat($dir)) {
+				next if $seen{"$st[0]\0$st[1]"}++;
+				$alt{$rel} = $score;
+			} else {
+				warn "W: stat($dir) failed: $! ($f)";
+				$alt{$rel} = $score unless $prune;
+			}
+		}
+	} elsif (!$!{ENOENT}) {
+		die "E: open($f): $!";
+	}
+	(\%alt, \%seen);
+}
+
+sub epoch_dir { "$_[0]->{topdir}/$_[0]->{epfx}" }
+
+sub write_alternates {
+	my ($self, $mode, $alt, @new) = @_;
+	my $all_dir = "$self->{topdir}/$self->{all}";
+	PublicInbox::Import::init_bare($all_dir);
+	my $out = join('', sort { $alt->{$b} <=> $alt->{$a} } keys %$alt);
+	my $info_dir = "$all_dir/objects/info";
+	my $fh = File::Temp->new(TEMPLATE => 'alt-XXXX', DIR => $info_dir);
+	my $f = $fh->filename;
+	print $fh $out, @new or die "print($f): $!";
+	chmod($mode, $fh) or die "fchmod($f): $!";
+	close $fh or die "close($f): $!";
+	my $fn = "$info_dir/alternates";
+	rename($f, $fn) or die "rename($f, $fn): $!";
+	$fh->unlink_on_destroy(0);
+}
+
+# returns true if new epochs exist
+sub merge_epochs {
+	my ($self, $alt, $seen) = @_;
+	my $epoch_dir = epoch_dir($self);
+	if (opendir my $dh, $epoch_dir) {
+		my $has_new;
+		for my $bn (grep(/\A[0-9]+\.git\z/, readdir($dh))) {
+			my $rel = "../../$self->{epfx}/$bn/objects\n";
+			next if exists($alt->{$rel});
+			if (my @st = stat("$epoch_dir/$bn/objects")) {
+				next if $seen->{"$st[0]\0$st[1]"}++;
+				$alt->{$rel} = substr($bn, 0, -4) + 0;
+				$has_new = 1;
+			} else {
+				warn "E: stat($epoch_dir/$bn/objects): $!";
+			}
+		}
+		$has_new;
+	} else {
+		$!{ENOENT} ? undef : die "opendir($epoch_dir): $!";
+	}
+}
+
+sub fill_alternates {
+	my ($self) = @_;
+	my ($alt, $seen) = read_alternates($self, \(my $mode = 0644));
+	merge_epochs($self, $alt, $seen) and
+		write_alternates($self, $mode, $alt);
+}
+
+sub epoch_cfg_set {
+	my ($self, $epoch_nr) = @_;
+	run_die([qw(git config -f), epoch_dir($self)."/$epoch_nr.git/config",
+		'include.path', "../../$self->{all}/config" ]);
+}
+
+sub add_epoch {
+	my ($self, $epoch_nr) = @_;
+	my $git_dir = epoch_dir($self)."/$epoch_nr.git";
+	my $f = "$git_dir/config";
+	my $existing = -f $f;
+	PublicInbox::Import::init_bare($git_dir);
+	epoch_cfg_set($self, $epoch_nr) unless $existing;
+	fill_alternates($self);
+	$git_dir;
+}
+
+sub git_epochs  {
+	my ($self) = @_;
+	if (opendir(my $dh, epoch_dir($self))) {
+		my @epochs = map {
+			substr($_, 0, -4) + 0; # drop ".git" suffix
+		} grep(/\A[0-9]+\.git\z/, readdir($dh));
+		wantarray ? sort { $b <=> $a } @epochs : (max(@epochs) // 0);
+	} elsif ($!{ENOENT}) {
+		wantarray ? () : 0;
+	} else {
+		die(epoch_dir($self).": $!");
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 1288f47b..971b007b 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -12,6 +12,7 @@ use PublicInbox::IPC;
 use PublicInbox::Eml;
 use PublicInbox::Git;
 use PublicInbox::Import;
+use PublicInbox::MultiGit;
 use PublicInbox::MID qw(mids references);
 use PublicInbox::ContentHash qw(content_hash content_digest git_sha);
 use PublicInbox::InboxWritable;
@@ -72,16 +73,14 @@ sub new {
 	$v2ibx = PublicInbox::InboxWritable->new($v2ibx);
 	my $dir = $v2ibx->assert_usable_dir;
 	unless (-d $dir) {
-		if ($creat) {
-			require File::Path;
-			File::Path::mkpath($dir);
-		} else {
-			die "$dir does not exist\n";
-		}
+		die "$dir does not exist\n" if !$creat;
+		require File::Path;
+		File::Path::mkpath($dir);
 	}
 	my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION;
 	my $self = {
 		ibx => $v2ibx,
+		mg => PublicInbox::MultiGit->new($dir, 'all.git', 'git'),
 		im => undef, #  PublicInbox::Import
 		parallel => 1,
 		transact_bytes => 0,
@@ -110,7 +109,7 @@ sub init_inbox {
 	$self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum;
 	my $max = $self->{ibx}->max_git_epoch;
 	$max = $skip_epoch if (defined($skip_epoch) && !defined($max));
-	$self->git_init($max // 0);
+	$self->{mg}->add_epoch($max // 0);
 	$self->done;
 }
 
@@ -641,70 +640,6 @@ sub done {
 	die $err if $err;
 }
 
-sub write_alternates ($$$) {
-	my ($info_dir, $mode, $out) = @_;
-	my $fh = File::Temp->new(TEMPLATE => 'alt-XXXX', DIR => $info_dir);
-	my $tmp = $fh->filename;
-	print $fh @$out or die "print $tmp: $!\n";
-	chmod($mode, $fh) or die "fchmod $tmp: $!\n";
-	close $fh or die "close $tmp $!\n";
-	my $alt = "$info_dir/alternates";
-	rename($tmp, $alt) or die "rename $tmp => $alt: $!\n";
-	$fh->unlink_on_destroy(0);
-}
-
-sub fill_alternates ($$) {
-	my ($self, $epoch) = @_;
-
-	my $pfx = "$self->{ibx}->{inboxdir}/git";
-	my $all = "$self->{ibx}->{inboxdir}/all.git";
-	PublicInbox::Import::init_bare($all) unless -d $all;
-	my $info_dir = "$all/objects/info";
-	my $alt = "$info_dir/alternates";
-	my (%alt, $new);
-	my $mode = 0644;
-	if (-e $alt) {
-		open(my $fh, '<', $alt) or die "open < $alt: $!\n";
-		$mode = (stat($fh))[2] & 07777;
-
-		# we assign a sort score to every alternate and favor
-		# the newest (highest numbered) one because loose objects
-		# require scanning epochs and only the latest epoch is
-		# expected to see loose objects
-		my $score;
-		my $other = 0; # in case admin adds non-epoch repos
-		%alt = map {;
-			if (m!\A\Q../../\E([0-9]+)\.git/objects\z!) {
-				$score = $1 + 0;
-			} else {
-				$score = --$other;
-			}
-			$_ => $score;
-		} split(/\n+/, do { local $/; <$fh> });
-	}
-
-	foreach my $i (0..$epoch) {
-		my $dir = "../../git/$i.git/objects";
-		if (!exists($alt{$dir}) && -d "$pfx/$i.git") {
-			$alt{$dir} = $i;
-			$new = 1;
-		}
-	}
-	return unless $new;
-	write_alternates($info_dir, $mode,
-		[join("\n", sort { $alt{$b} <=> $alt{$a} } keys %alt), "\n"]);
-}
-
-sub git_init {
-	my ($self, $epoch) = @_;
-	my $git_dir = "$self->{ibx}->{inboxdir}/git/$epoch.git";
-	PublicInbox::Import::init_bare($git_dir);
-	run_die([qw(git config), "--file=$git_dir/config",
-		qw(include.path ../../all.git/config)]);
-	fill_alternates($self, $epoch);
-	$git_dir
-}
-
 sub importer {
 	my ($self) = @_;
 	my $im = $self->{im};
@@ -716,8 +651,8 @@ sub importer {
 			$im->done;
 			$im = undef;
 			$self->checkpoint;
-			my $git_dir = $self->git_init(++$self->{epoch_max});
-			my $git = PublicInbox::Git->new($git_dir);
+			my $dir = $self->{mg}->add_epoch(++$self->{epoch_max});
+			my $git = PublicInbox::Git->new($dir);
 			return $self->import_init($git, 0);
 		}
 	}
@@ -737,8 +672,8 @@ sub importer {
 		}
 	}
 	$self->{epoch_max} = $epoch;
-	$latest = $self->git_init($epoch);
-	$self->import_init(PublicInbox::Git->new($latest), 0);
+	my $dir = $self->{mg}->add_epoch($epoch);
+	$self->import_init(PublicInbox::Git->new($dir), 0);
 }
 
 sub import_init {
@@ -1335,7 +1270,7 @@ sub index_sync {
 	local $self->{ibx}->{indexlevel} = 'basic' if $seq;
 
 	$self->idx_init($opt); # acquire lock
-	fill_alternates($self, $epoch_max);
+	$self->{mg}->fill_alternates;
 	$self->{oidx}->rethread_prepare($opt);
 	my $sync = {
 		need_checkpoint => \(my $bool = 0),
diff --git a/script/public-inbox-convert b/script/public-inbox-convert
index fec6b624..01af846a 100755
--- a/script/public-inbox-convert
+++ b/script/public-inbox-convert
@@ -179,7 +179,7 @@ if (my $old_mm = $old->mm) {
 	$v2w->idx_init($opt);
 	$v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm);
 
-	my $epoch0 = PublicInbox::Git->new($v2w->git_init(0));
+	my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0));
 	chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head));
 	$v2w->last_epoch_commit(0, $cmt);
 }
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
index 44acbe95..5238b67c 100644
--- a/t/lei-mirror.t
+++ b/t/lei-mirror.t
@@ -95,7 +95,20 @@ SKIP: {
 
 	ok(run_script([qw(-clone -q -C), $d, "$http/t2"], undef, $opt),
 		'-clone succeeds on v2');
-	ok(-d "$d/t2/git/0.git", 'epoch cloned');
+	ok(-f "$d/t2/git/0.git/config", 'epoch cloned');
+
+	# writeBitmaps is the default for bare repos in git 2.22+,
+	# so we may stop setting it ourselves.
+	0 and is(xqx(['git', "--git-dir=$d/t2/git/0.git", 'config',
+		qw(--bool repack.writeBitmaps)]), "true\n",
+		'write bitmaps set (via include.path=all.git/config');
+
+	is(xqx(['git', "--git-dir=$d/t2/git/0.git", 'config',
+		qw(include.path)]), "../../all.git/config\n",
+		'include.path set');
+
+	ok(-s "$d/t2/all.git/objects/info/alternates",
+		'all.git alternates created');
 	ok(-f "$d/t2/manifest.js.gz", 'manifest saved');
 	ok(!-e "$d/t2/mirror.done", 'no leftover mirror.done');
 	ok(run_script([qw(-fetch -C), "$d/t2"], undef, $opt),
diff --git a/t/v2mirror.t b/t/v2mirror.t
index 8bcffc29..54ad6945 100644
--- a/t/v2mirror.t
+++ b/t/v2mirror.t
@@ -228,10 +228,13 @@ EOF
 	is(scalar($mset->items), 0, 'large message not re-indexed');
 }
 ok(scalar(@new_epochs), 'new epochs were created and fetched');
+for my $d (@new_epochs) {
+	is(xqx(['git', "--git-dir=$d", 'config', qw(include.path)]),
+		"../../all.git/config\n",
+		'include.path set');
+}
 
 ok($td->kill, 'killed httpd');
 $td->join;
 
-done_testing();
-
-1;
+done_testing;
diff --git a/t/v2writable.t b/t/v2writable.t
index d9e7b980..477621e2 100644
--- a/t/v2writable.t
+++ b/t/v2writable.t
@@ -308,7 +308,7 @@ ok($@, 'V2Writable fails on non-existent dir');
 	open $fh, '<', $alt or die $!;
 	my $before = do { local $/; <$fh> };
 
-	ok($v2w->git_init(3), 'init a new epoch');
+	ok($v2w->{mg}->add_epoch(3), 'init a new epoch');
 	open $fh, '<', $alt or die $!;
 	my $after = do { local $/; <$fh> };
 	ok(index($after, $before) > 0,

^ permalink raw reply related	[relevance 8%]

* [PATCH 13/17] lei: add-external --mirror support
  2021-02-06 12:18 11% [PATCH 00/17] lei: more random updates Eric Wong
@ 2021-02-06 12:18 14% ` Eric Wong
  0 siblings, 0 replies; 5+ results
From: Eric Wong @ 2021-02-06 12:18 UTC (permalink / raw)
  To: meta

This can be useful for users who want to clone and
mirror an existing public-inbox.  This doesn't have
update support, yet, so users will need to run
"git fetch && public-inbox-index" for now.
---
 MANIFEST                               |   3 +
 contrib/completion/lei-completion.bash |   2 +-
 lib/PublicInbox/Admin.pm               |   7 +-
 lib/PublicInbox/LEI.pm                 |  17 +-
 lib/PublicInbox/LeiCurl.pm             |  65 ++++++
 lib/PublicInbox/LeiExternal.pm         |  28 ++-
 lib/PublicInbox/LeiMirror.pm           | 288 +++++++++++++++++++++++++
 lib/PublicInbox/LeiXSearch.pm          |  33 +--
 lib/PublicInbox/TestCommon.pm          |   5 +-
 t/lei-mirror.t                         |  24 +++
 10 files changed, 427 insertions(+), 45 deletions(-)
 create mode 100644 lib/PublicInbox/LeiCurl.pm
 create mode 100644 lib/PublicInbox/LeiMirror.pm
 create mode 100644 t/lei-mirror.t

diff --git a/MANIFEST b/MANIFEST
index 52dea385..4236f87c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -177,9 +177,11 @@ lib/PublicInbox/InputPipe.pm
 lib/PublicInbox/Isearch.pm
 lib/PublicInbox/KQNotify.pm
 lib/PublicInbox/LEI.pm
+lib/PublicInbox/LeiCurl.pm
 lib/PublicInbox/LeiDedupe.pm
 lib/PublicInbox/LeiExternal.pm
 lib/PublicInbox/LeiImport.pm
+lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiQuery.pm
 lib/PublicInbox/LeiSearch.pm
@@ -357,6 +359,7 @@ t/kqnotify.t
 t/lei-daemon.t
 t/lei-externals.t
 t/lei-import.t
+t/lei-mirror.t
 t/lei.t
 t/lei_dedupe.t
 t/lei_external.t
diff --git a/contrib/completion/lei-completion.bash b/contrib/completion/lei-completion.bash
index fbda474c..619805fb 100644
--- a/contrib/completion/lei-completion.bash
+++ b/contrib/completion/lei-completion.bash
@@ -5,7 +5,7 @@
 # Needs a lot of work, see `lei__complete' in lib/PublicInbox::LEI.pm
 _lei() {
 	case ${COMP_WORDS[@]} in
-	*' add-external http'*)
+	*' add-external h'* | *' --mirror h'*)
 		compopt -o nospace
 		;;
 	*) compopt +o nospace ;; # the default
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 3b38a5a3..b21fb241 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -273,8 +273,8 @@ EOM
 	$idx->{nidx} // 0; # returns number processed
 }
 
-sub progress_prepare ($) {
-	my ($opt) = @_;
+sub progress_prepare ($;$) {
+	my ($opt, $dst) = @_;
 
 	# public-inbox-index defaults to quiet, -xcpdb and -compact do not
 	if (defined($opt->{quiet}) && $opt->{quiet} < 0) {
@@ -286,7 +286,8 @@ sub progress_prepare ($) {
 		$opt->{1} = $null; # suitable for spawn() redirect
 	} else {
 		$opt->{verbose} ||= 1;
-		$opt->{-progress} = sub { print STDERR @_ };
+		$dst //= *STDERR{GLOB};
+		$opt->{-progress} = sub { print $dst @_ };
 	}
 }
 
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 28ad88e7..bdeab7e3 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -98,6 +98,13 @@ sub _config_path ($) {
 		.'/lei/config');
 }
 
+sub index_opt {
+	# TODO: drop underscore variants everywhere, they're undocumented
+	qw(fsync|sync! jobs|j=i indexlevel|index-level|L=s compact+
+	max_size|max-size=s sequential_shard|sequential-shard
+	batch_size|batch-size=s skip-docdata quiet|q verbose|v+)
+}
+
 # TODO: generate shell completion + help using %CMD and %OPTDESC
 # command => [ positional_args, 1-line description, Getopt::Long option spec ]
 our %CMD = ( # sorted in order of importance/use:
@@ -105,7 +112,7 @@ our %CMD = ( # sorted in order of importance/use:
 	save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
 	sort|s=s reverse|r offset=i remote! local! external! pretty
 	include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g stdin|
-	mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q
+	mua-cmd|mua=s no-torsocks torsocks=s verbose|v+ quiet|q
 	received-after=s received-before=s sent-after=s sent-since=s),
 	PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
 
@@ -115,7 +122,8 @@ our %CMD = ( # sorted in order of importance/use:
 
 'add-external' => [ 'URL_OR_PATHNAME',
 	'add/set priority of a publicinbox|extindex for extra matches',
-	qw(boost=i quiet|q) ],
+	qw(boost=i c=s@ mirror=s no-torsocks torsocks=s inbox-version=i),
+	index_opt(), PublicInbox::LeiQuery::curl_opt() ],
 'ls-external' => [ '[FILTER...]', 'list publicinbox|extindex locations',
 	qw(format|f=s z|0 local remote quiet|q) ],
 'forget-external' => [ 'URL_OR_PATHNAME...|--prune',
@@ -204,7 +212,7 @@ my %OPTDESC = (
 'help|h' => 'show this built-in help',
 'quiet|q' => 'be quiet',
 'globoff|g' => "do not match locations using '*?' wildcards and '[]' ranges",
-'verbose|v' => 'be more verbose',
+'verbose|v+' => 'be more verbose',
 'solve!' => 'do not attempt to reconstruct blobs from emails',
 'torsocks=s' => ['auto|no|yes',
 		'whether or not to wrap git and curl commands with torsocks'],
@@ -286,7 +294,7 @@ my %CONFIG_KEYS = (
 	'leistore.dir' => 'top-level storage location',
 );
 
-my @WQ_KEYS = qw(lxs l2m imp); # internal workers
+my @WQ_KEYS = qw(lxs l2m imp mrr); # internal workers
 
 # pronounced "exit": x_it(1 << 8) => exit(1); x_it(13) => SIGPIPE
 sub x_it ($$) {
@@ -714,6 +722,7 @@ sub lei__complete {
 		}
 		puts $self, grep(/$re/, map { # generate short/long names
 			if (s/[:=].+\z//) { # req/optional args, e.g output|o=i
+			} elsif (s/\+\z//) { # verbose|v+
 			} elsif (s/!\z//) {
 				# negation: solve! => no-solve|solve
 				s/([\w\-]+)/$1|no-$1/g
diff --git a/lib/PublicInbox/LeiCurl.pm b/lib/PublicInbox/LeiCurl.pm
new file mode 100644
index 00000000..c8747d4f
--- /dev/null
+++ b/lib/PublicInbox/LeiCurl.pm
@@ -0,0 +1,65 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# common option and torsocks(1) wrapping for curl(1)
+package PublicInbox::LeiCurl;
+use strict;
+use v5.10.1;
+use PublicInbox::Spawn qw(which);
+use PublicInbox::Config;
+
+# prepares a common command for curl(1) based on $lei command
+sub new {
+	my ($cls, $lei, $curl) = @_;
+	$curl //= which('curl') // return $lei->fail('curl not found');
+	my $opt = $lei->{opt};
+	my @cmd = ($curl, qw(-Sf));
+	$cmd[-1] .= 's' if $opt->{quiet}; # already the default for "lei q"
+	$cmd[-1] .= 'v' if $opt->{verbose}; # we use ourselves, too
+	for my $o ($lei->curl_opt) {
+		$o =~ s/\|[a-z0-9]\b//i; # remove single char short option
+		if ($o =~ s/=[is]@\z//) {
+			my $ary = $opt->{$o} or next;
+			push @cmd, map { ("--$o", $_) } @$ary;
+		} elsif ($o =~ s/=[is]\z//) {
+			my $val = $opt->{$o} // next;
+			push @cmd, "--$o", $val;
+		} elsif ($opt->{$o}) {
+			push @cmd, "--$o";
+		}
+	}
+	push @cmd, '-v' if $opt->{verbose}; # lei uses this itself
+	bless \@cmd, $cls;
+}
+
+sub torsocks { # useful for "git clone" and "git fetch", too
+	my ($self, $lei, $uri)= @_;
+	my $opt = $lei->{opt};
+	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
+	my $torsocks = $opt->{torsocks} //= 'auto';
+	if ($torsocks eq 'auto' && substr($uri->host, -6) eq '.onion' &&
+			(($lei->{env}->{LD_PRELOAD}//'') !~ /torsocks/)) {
+		# "auto" continues anyways if torsocks is missing;
+		# a proxy may be specified via CLI, curlrc,
+		# environment variable, or even firewall rule
+		[ ($lei->{torsocks} //= which('torsocks')) // () ]
+	} elsif (PublicInbox::Config::git_bool($torsocks)) {
+		my $x = $lei->{torsocks} //= which('torsocks');
+		$x or return $lei->fail(<<EOM);
+--torsocks=yes specified but torsocks not found in PATH=$ENV{PATH}
+EOM
+		[ $x ];
+	} else { # the common case for current Internet :<
+		[];
+	}
+}
+
+# completes the result of cmd() for $uri
+sub for_uri {
+	my ($self, $lei, $uri) = @_;
+	my $pfx = torsocks($self, $lei, $uri) or return; # error
+	[ @$pfx, @$self, substr($uri->path, -3) eq '.gz' ? () : '--compressed',
+		$uri->as_string ]
+}
+
+1;
diff --git a/lib/PublicInbox/LeiExternal.pm b/lib/PublicInbox/LeiExternal.pm
index accacf1a..6a5c2517 100644
--- a/lib/PublicInbox/LeiExternal.pm
+++ b/lib/PublicInbox/LeiExternal.pm
@@ -88,19 +88,35 @@ sub get_externals {
 	();
 }
 
-sub lei_add_external {
+sub add_external_finish {
 	my ($self, $location) = @_;
 	my $cfg = $self->_lei_cfg(1);
 	my $new_boost = $self->{opt}->{boost} // 0;
-	$location = ext_canonicalize($location);
-	if ($location !~ m!\Ahttps?://! && !-d $location) {
-		return $self->fail("$location not a directory");
-	}
 	my $key = "external.$location.boost";
 	my $cur_boost = $cfg->{$key};
 	return if defined($cur_boost) && $cur_boost == $new_boost; # idempotent
 	$self->lei_config($key, $new_boost);
-	$self->_lei_store(1)->done; # just create the store
+}
+
+sub lei_add_external {
+	my ($self, $location) = @_;
+	$self->_lei_store(1)->write_prepare($self);
+	my $new_boost = $self->{opt}->{boost} // 0;
+	$location = ext_canonicalize($location);
+	my $mirror = $self->{opt}->{mirror};
+	if (defined($mirror) && -d $location) {
+		$self->fail(<<""); # TODO: did you mean "update-external?"
+--mirror destination `$location' already exists
+
+	}
+	if ($location !~ m!\Ahttps?://! && !-d $location) {
+		$mirror // return $self->fail("$location not a directory");
+		$mirror = ext_canonicalize($mirror);
+		require PublicInbox::LeiMirror;
+		PublicInbox::LeiMirror->start($self, $mirror => $location);
+	} else {
+		add_external_finish($self, $location);
+	}
 }
 
 sub lei_forget_external {
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
new file mode 100644
index 00000000..bb172e6a
--- /dev/null
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -0,0 +1,288 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# "lei add-external --mirror" support
+package PublicInbox::LeiMirror;
+use strict;
+use v5.10.1;
+use parent qw(PublicInbox::IPC);
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
+use PublicInbox::Spawn qw(popen_rd spawn);
+use PublicInbox::PktOp;
+
+sub mirror_done { # EOF callback for main daemon
+	my ($lei) = @_;
+	my $mrr = delete $lei->{mrr};
+	$mrr->wq_wait_old($lei) if $mrr;
+	# FIXME: check $? before finish
+	$lei->add_external_finish($mrr->{dst});
+	$lei->dclose;
+}
+
+# for old installations without manifest.js.gz
+sub try_scrape {
+	my ($self) = @_;
+	my $uri = URI->new($self->{src});
+	my $lei = $self->{lei};
+	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+	my $cmd = $curl->for_uri($lei, $uri);
+	my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
+	my $fh = popen_rd($cmd, $lei->{env}, $opt);
+	my $html = do { local $/; <$fh> } // die "read(curl $uri): $!";
+	close($fh) or return $lei->child_error($?, "@$cmd failed");
+
+	# we grep with URL below, we don't want Subject/From headers
+	# making us clone random URLs
+	my @urls = ($html =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
+	my $url = $uri->as_string;
+	chop($url) eq '/' or die "BUG: $uri not canonicalized";
+
+	# since this is for old instances w/o manifest.js.gz, try v1 first
+	return clone_v1($self) if grep(m!\A\Q$url\E/*\z!, @urls);
+	if (my @v2_urls = grep(m!\A\Q$url\E/[0-9]+\z!, @urls)) {
+		my %v2_uris = map { $_ => URI->new($_) } @v2_urls; # uniq
+		return clone_v2($self, [ values %v2_uris ]);
+	}
+
+	# filter out common URLs served by WWW (e.g /$MSGID/T/)
+	if (@urls && $url =~ s!/+[^/]+\@[^/]+/.*\z!! &&
+			grep(m!\A\Q$url\E/*\z!, @urls)) {
+		die <<"";
+E: confused by scraping <$uri>, did you mean <$url>?
+
+	}
+	@urls and die <<"";
+E: confused by scraping <$uri>, got ambiguous results:
+@urls
+
+	die "E: scraping <$uri> revealed nothing\n";
+}
+
+sub clone_cmd {
+	my ($lei) = @_;
+	my @cmd = qw(git);
+	# we support "-c $key=$val" for arbitrary git config options
+	# e.g.: git -c http.proxy=socks5h://127.0.0.1:9050
+	push(@cmd, '-c', $_) for @{$lei->{opt}->{c} // []};
+	push @cmd, qw(clone --mirror);
+	push @cmd, '-q' if $lei->{opt}->{quiet};
+	push @cmd, '-v' if $lei->{opt}->{verbose};
+	# XXX any other options to support?
+	# --reference is tricky with multiple epochs...
+	@cmd;
+}
+
+# tries the relatively new /$INBOX/_/text/config/raw endpoint
+sub _try_config {
+	my ($self) = @_;
+	my $dst = $self->{dst};
+	if (!-d $dst || !mkdir($dst)) {
+		require File::Path;
+		File::Path::mkpath($dst);
+		-d $dst or die "mkpath($dst): $!\n";
+	}
+	my $uri = URI->new($self->{src});
+	my $lei = $self->{lei};
+	my $path = $uri->path;
+	chop($path) eq '/' or die "BUG: $uri not canonicalized";
+	$uri->path($path . '/_/text/config/raw');
+	my $cmd = $self->{curl}->for_uri($lei, $uri);
+	push @$cmd, '--compressed'; # curl decompresses for us
+	my $ce = "$dst/inbox.config.example";
+	my $f = "$ce-$$.tmp";
+	open(my $fh, '+>', $f) or return $lei->err("open $f: $! (non-fatal)");
+	my $opt = { 0 => $lei->{0}, 1 => $fh, 2 => $lei->{2} };
+	$lei->qerr("# @$cmd");
+	my $pid = spawn($cmd, $lei->{env}, $opt);
+	waitpid($pid, 0) == $pid or return $lei->err("waitpid @$cmd: $!");
+	if (($? >> 8) == 22) { # 404 missing
+		unlink($f) if -s $fh == 0;
+		return;
+	}
+	return $lei->err("# @$cmd failed (non-fatal)") if $?;
+	rename($f, $ce) or return $lei->err("link($f, $ce): $! (non-fatal)");
+	my $cfg = PublicInbox::Config::git_config_dump($f);
+	my $ibx = $self->{ibx} = {};
+	for my $sec (grep(/\Apublicinbox\./, @{$cfg->{-section_order}})) {
+		for (qw(address newsgroup nntpmirror)) {
+			$ibx->{$_} = $cfg->{"$sec.$_"};
+		}
+	}
+}
+
+sub index_cloned_inbox {
+	my ($self, $iv) = @_;
+	my $ibx = delete($self->{ibx}) // {
+		address => [ 'lei@example.com' ],
+		version => $iv,
+	};
+	$ibx->{inboxdir} = $self->{dst};
+	PublicInbox::Inbox->new($ibx);
+	PublicInbox::InboxWritable->new($ibx);
+	my $opt = {};
+	my $lei = $self->{lei};
+	for my $sw ($lei->index_opt) {
+		my ($k) = ($sw =~ /\A([\w-]+)/);
+		$opt->{$k} = $lei->{opt}->{$k};
+	}
+	# force synchronous dwaitpid for v2:
+	local $PublicInbox::DS::in_loop = 0;
+	my $cfg = PublicInbox::Config->new;
+	my $env = PublicInbox::Admin::index_prepare($opt, $cfg);
+	local %ENV = (%ENV, %$env) if $env;
+	PublicInbox::Admin::progress_prepare($opt, $lei->{2});
+	PublicInbox::Admin::index_inbox($ibx, undef, $opt);
+}
+
+sub clone_v1 {
+	my ($self) = @_;
+	my $lei = $self->{lei};
+	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+	my $uri = URI->new($self->{src});
+	my $pfx = $curl->torsocks($lei, $uri) or return;
+	my $cmd = [ @$pfx, clone_cmd($lei), $uri->as_string, $self->{dst} ];
+	$lei->qerr("# @$cmd");
+	my $pid = spawn($cmd, $lei->{env}, $lei);
+	waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!";
+	$? == 0 or return $lei->child_error($?, "@$cmd failed");
+	_try_config($self);
+	index_cloned_inbox($self, 1);
+}
+
+sub clone_v2 {
+	my ($self, $v2_uris) = @_;
+	my $lei = $self->{lei};
+	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+	my $pfx //= $curl->torsocks($lei, $v2_uris->[0]) or return;
+	my @epochs;
+	my $dst = $self->{dst};
+	my @src_edst;
+	for my $uri (@$v2_uris) {
+		my $src = $uri->as_string;
+		my $edst = $dst;
+		$src =~ m!/([0-9]+)(?:\.git)?\z! or die <<"";
+failed to extract epoch number from $src
+
+		my $nr = $1 + 0;
+		$edst .= "/git/$nr.git";
+		push @src_edst, [ $src, $edst ];
+	}
+	my $lk = bless { lock_path => "$dst/inbox.lock" }, 'PublicInbox::Lock';
+	_try_config($self);
+	my $on_destroy = $lk->lock_for_scope($$);
+	my @cmd = clone_cmd($lei);
+	while (my $pair = shift(@src_edst)) {
+		my $cmd = [ @$pfx, @cmd, @$pair ];
+		$lei->qerr("# @$cmd");
+		my $pid = spawn($cmd, $lei->{env}, $lei);
+		waitpid($pid, 0) == $pid or die "BUG: waitpid @$cmd: $!";
+		$? == 0 or return $lei->child_error($?, "@$cmd failed");
+	}
+	undef $on_destroy; # unlock
+	index_cloned_inbox($self, 2);
+}
+
+sub try_manifest {
+	my ($self) = @_;
+	my $uri = URI->new($self->{src});
+	my $lei = $self->{lei};
+	my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
+	my $path = $uri->path;
+	chop($path) eq '/' or die "BUG: $uri not canonicalized";
+	$uri->path($path . '/manifest.js.gz');
+	my $cmd = $curl->for_uri($lei, $uri);
+	$lei->qerr("# @$cmd");
+	my $opt = { 0 => $lei->{0}, 2 => $lei->{2} };
+	my $fh = popen_rd($cmd, $lei->{env}, $opt);
+	my $gz = do { local $/; <$fh> } // die "read(curl $uri): $!";
+	unless (close $fh) {
+		return try_scrape($self) if ($? >> 8) == 22; # 404 missing
+		return $lei->child_error($?, "@$cmd failed");
+	}
+	my $js;
+	gunzip(\$gz => \$js, MultiStream => 1) or
+		die "gunzip($uri): $GunzipError";
+	my $m = eval { PublicInbox::Config->json->decode($js) };
+	die "$uri: error decoding `$js': $@" if $@;
+	ref($m) eq 'HASH' or die "$uri unknown type: ".ref($m);
+
+	my $v1_bare = $m->{$path};
+	my @v2_epochs = grep(m!\A\Q$path\E/git/[0-9]+\.git\z!, keys %$m);
+	if (@v2_epochs) {
+		# It may be possible to have v1 + v2 in parallel someday:
+		$lei->err(<<EOM) if defined $v1_bare;
+# `$v1_bare' appears to be a v1 inbox while v2 epochs exist:
+# @v2_epochs
+# ignoring $v1_bare (use --inbox-version=1 to force v1 instead)
+EOM
+		@v2_epochs = map { $uri->path($_); $uri->clone } @v2_epochs;
+		clone_v2($self, \@v2_epochs);
+	} elsif ($v1_bare) {
+		clone_v1($self);
+	} elsif (my @maybe = grep(m!\Q$path\E!, keys %$m)) {
+		die "E: confused by <$uri>, possible matches:\n@maybe";
+	} else {
+		die "E: confused by <$uri>";
+	}
+}
+
+sub start_clone_url {
+	my ($self) = @_;
+	return try_manifest($self) if $self->{src} =~ m!\Ahttps?://!;
+	die "TODO: non-HTTP/HTTPS clone of $self->{src} not supported, yet";
+}
+
+sub do_mirror { # via wq_do
+	my ($self) = @_;
+	my $lei = $self->{lei};
+	eval {
+		my $iv = $lei->{opt}->{'inbox-version'};
+		if (defined $iv) {
+			return clone_v1($self) if $iv == 1;
+			return try_scrape($self) if $iv == 2;
+			die "bad --inbox-version=$iv\n";
+		}
+		return start_clone_url($self) if $self->{src} =~ m!://!;
+		die "TODO: cloning local directories not supported, yet";
+	};
+	return $lei->fail($@) if $@;
+	$lei->qerr("# mirrored $self->{src} => $self->{dst}");
+}
+
+sub start {
+	my ($cls, $lei, $src, $dst) = @_;
+	my $self = bless { lei => $lei, src => $src, dst => $dst }, $cls;
+	$lei->{mrr} = $self;
+	if ($src =~ m!https?://!) {
+		require URI;
+		require PublicInbox::LeiCurl;
+	}
+	require PublicInbox::Lock;
+	require PublicInbox::Inbox;
+	require PublicInbox::Admin;
+	require PublicInbox::InboxWritable;
+	my $ops = {
+		'!' => [ $lei->can('fail_handler'), $lei ],
+		'x_it' => [ $lei->can('x_it'), $lei ],
+		'child_error' => [ $lei->can('child_error'), $lei ],
+		'' => [ \&mirror_done, $lei ],
+	};
+	($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops);
+	$self->wq_workers_start('lei_mirror', 1, $lei->oldset, {lei => $lei});
+	my $op = delete $lei->{pkt_op_c};
+	delete $lei->{pkt_op_p};
+	$self->wq_do('do_mirror', []);
+	$self->wq_close(1);
+	$lei->event_step_init; # wait for shutdowns
+	if ($lei->{oneshot}) {
+		while ($op->{sock}) { $op->event_step }
+	}
+}
+
+sub ipc_atfork_child {
+	my ($self) = @_;
+	$self->{lei}->lei_atfork_child;
+	$self->SUPER::ipc_atfork_child;
+}
+
+1;
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index f8068362..1e5d7ca6 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -212,7 +212,6 @@ sub query_remote_mboxrd {
 	my ($opt, $env) = @$lei{qw(opt env)};
 	my @qform = (q => $lei->{mset_opt}->{qstr}, x => 'm');
 	push(@qform, t => 1) if $opt->{thread};
-	my @cmd = ($self->{curl}, qw(-sSf -d), '');
 	my $verbose = $opt->{verbose};
 	my $reap;
 	my $cerr = File::Temp->new(TEMPLATE => 'curl.err-XXXX', TMPDIR => 1);
@@ -223,43 +222,18 @@ sub query_remote_mboxrd {
 		# spawn a process to force line-buffering, otherwise curl
 		# will write 1 character at-a-time and parallel outputs
 		# mmmaaayyy llloookkk llliiikkkeee ttthhhiiisss
-		push @cmd, '-v';
 		my $o = { 1 => $lei->{2}, 2 => $lei->{2} };
 		my $pid = spawn(['tail', '-f', $cerr->filename], undef, $o);
 		$reap = PublicInbox::OnDestroy->new(\&kill_reap, $pid);
 	}
-	for my $o ($lei->curl_opt) {
-		$o =~ s/\|[a-z0-9]\b//i; # remove single char short option
-		if ($o =~ s/=[is]@\z//) {
-			my $ary = $opt->{$o} or next;
-			push @cmd, map { ("--$o", $_) } @$ary;
-		} elsif ($o =~ s/=[is]\z//) {
-			my $val = $opt->{$o} // next;
-			push @cmd, "--$o", $val;
-		} elsif ($opt->{$o}) {
-			push @cmd, "--$o";
-		}
-	}
-	$opt->{torsocks} = 'false' if $opt->{'no-torsocks'};
-	my $tor = $opt->{torsocks} //= 'auto';
+	my $curl = PublicInbox::LeiCurl->new($lei, $self->{curl}) or return;
+	push @$curl, '-s', '-d', '';
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
 	for my $uri (@$uris) {
 		$lei->{-current_url} = $uri->as_string;
 		$lei->{-nr_remote_eml} = 0;
 		$uri->query_form(@qform);
-		my $cmd = [ @cmd, $uri->as_string ];
-		if ($tor eq 'auto' && substr($uri->host, -6) eq '.onion' &&
-				(($env->{LD_PRELOAD}//'') !~ /torsocks/)) {
-			unshift @$cmd, which('torsocks');
-		} elsif (PublicInbox::Config::git_bool($tor)) {
-			unshift @$cmd, which('torsocks');
-		}
-
-		# continue anyways if torsocks is missing; a proxy may be
-		# specified via CLI, curlrc, environment variable, or even
-		# firewall rule
-		shift(@$cmd) if !$cmd->[0];
-
+		my $cmd = $curl->for_uri($lei, $uri);
 		$lei->err("# @$cmd") if $verbose;
 		my ($fh, $pid) = popen_rd($cmd, $env, $rdr);
 		$fh = IO::Uncompress::Gunzip->new($fh);
@@ -440,6 +414,7 @@ sub add_uri {
 	if (my $curl = $self->{curl} //= which('curl') // 0) {
 		require PublicInbox::MboxReader;
 		require IO::Uncompress::Gunzip;
+		require PublicInbox::LeiCurl;
 		push @{$self->{remotes}}, $uri;
 	} else {
 		warn "curl missing, ignoring $uri\n";
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index c861dc5d..5cce44e4 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -461,8 +461,9 @@ SKIP: {
 Socket::MsgHdr missing or Inline::C is unconfigured/missing
 EOM
 	$lei_opt = { 1 => \$lei_out, 2 => \$lei_err };
-	my $daemon_pid;
-	my ($tmpdir, $for_destroy) = tmpdir();
+	my ($daemon_pid, $for_destroy);
+	my $tmpdir = $test_opt->{tmpdir};
+	($tmpdir, $for_destroy) = tmpdir unless $tmpdir;
 	SKIP: {
 		skip 'TEST_LEI_ONESHOT set', 1 if $ENV{TEST_LEI_ONESHOT};
 		my $home = "$tmpdir/lei-daemon";
diff --git a/t/lei-mirror.t b/t/lei-mirror.t
new file mode 100644
index 00000000..cf34c7ae
--- /dev/null
+++ b/t/lei-mirror.t
@@ -0,0 +1,24 @@
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+my $sock = tcp_server();
+my ($tmpdir, $for_destroy) = tmpdir();
+my $http = 'http://'.$sock->sockhost.':'.$sock->sockport.'/';
+my ($ro_home, $cfg_path) = setup_public_inboxes;
+my $cmd = [ qw(-httpd -W0), "--stdout=$tmpdir/out", "--stderr=$tmpdir/err" ];
+my $td = start_script($cmd, { PI_CONFIG => $cfg_path }, { 3 => $sock });
+test_lei({ tmpdir => $tmpdir }, sub {
+	my $home = $ENV{HOME};
+	my $t1 = "$home/t1-mirror";
+	ok($lei->('add-external', $t1, '--mirror', "$http/t1/"), '--mirror v1');
+	ok(-f "$t1/public-inbox/msgmap.sqlite3", 't1-mirror indexed');
+	my $t2 = "$home/t2-mirror";
+	ok($lei->('add-external', $t2, '--mirror', "$http/t2/"), '--mirror v2');
+	ok(-f "$t2/msgmap.sqlite3", 't2-mirror indexed');
+});
+
+ok($td->kill, 'killed -httpd');
+$td->join;
+
+done_testing;

^ permalink raw reply related	[relevance 14%]

* [PATCH 00/17] lei: more random updates
@ 2021-02-06 12:18 11% Eric Wong
  2021-02-06 12:18 14% ` [PATCH 13/17] lei: add-external --mirror support Eric Wong
  0 siblings, 1 reply; 5+ results
From: Eric Wong @ 2021-02-06 12:18 UTC (permalink / raw)
  To: meta

"lei add-external --mirror $URL $DESTDIR" works.
Tests are more split out and hopefully easier-to-manage
going forward (they are slowing down, though, but
more use of common setup_public_inboxes() may help).

The curl(1) short options are gone to avoid conflicts.
--help looks a bit nicer, now.

Eric Wong (17):
  lei_overview: drop unnecessary autoflush call
  lei: favor "keywords" over "flags", test --no-kw
  lei: fix completion of --no-kw / --no-keywords
  lei: abort lei_import worker on client abort
  init: lowercase -j for --jobs
  lei_query: trim curl options
  tests: add test_lei wrapper, split out t/lei-import.t
  t/lei-externals: split out into separate test
  t/tests: split out setup_public_inboxes sub
  tests: split out lei-daemon.t from lei.t
  treewide: replace confess with croak
  script/lei: avoid waitpid(-1, ...) to keep tests fast
  lei: add-external --mirror support
  lei help: split out into separate file
  lei add-external: reject index and remote opts w/o mirror
  lei_curl: replace -K/--config with --curl-config
  lei: remove short switch support for curl(1) options

 MANIFEST                               |  11 +-
 Makefile.PL                            |   3 +
 contrib/completion/lei-completion.bash |   2 +-
 lib/PublicInbox/Admin.pm               |   7 +-
 lib/PublicInbox/DS.pm                  |  10 +-
 lib/PublicInbox/Eml.pm                 |   4 +-
 lib/PublicInbox/IPC.pm                 |   2 +-
 lib/PublicInbox/LEI.pm                 | 200 +++++-------
 lib/PublicInbox/LeiCurl.pm             |  72 +++++
 lib/PublicInbox/LeiExternal.pm         |  46 ++-
 lib/PublicInbox/LeiHelp.pm             | 100 ++++++
 lib/PublicInbox/LeiImport.pm           |   4 +-
 lib/PublicInbox/LeiMirror.pm           | 288 +++++++++++++++++
 lib/PublicInbox/LeiOverview.pm         |   1 -
 lib/PublicInbox/LeiQuery.pm            |  24 +-
 lib/PublicInbox/LeiXSearch.pm          |  33 +-
 lib/PublicInbox/OverIdx.pm             |   2 +-
 lib/PublicInbox/TestCommon.pm          | 142 ++++++++-
 script/lei                             |  28 +-
 script/public-inbox-init               |   2 +-
 t/home1/.gitignore                     |   5 +
 t/home1/Makefile                       |   7 +
 t/home1/README                         |   8 +
 t/lei-daemon.t                         |  63 ++++
 t/lei-externals.t                      | 200 ++++++++++++
 t/lei-import.t                         |  39 +++
 t/lei-mirror.t                         |  30 ++
 t/lei-oneshot.t                        |   8 -
 t/lei.t                                | 424 +++----------------------
 29 files changed, 1180 insertions(+), 585 deletions(-)
 create mode 100644 lib/PublicInbox/LeiCurl.pm
 create mode 100644 lib/PublicInbox/LeiHelp.pm
 create mode 100644 lib/PublicInbox/LeiMirror.pm
 create mode 100644 t/home1/.gitignore
 create mode 100644 t/home1/Makefile
 create mode 100644 t/home1/README
 create mode 100644 t/lei-daemon.t
 create mode 100644 t/lei-externals.t
 create mode 100644 t/lei-import.t
 create mode 100644 t/lei-mirror.t
 delete mode 100644 t/lei-oneshot.t

^ permalink raw reply	[relevance 11%]

Results 1-5 of 5 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-02-06 12:18 11% [PATCH 00/17] lei: more random updates Eric Wong
2021-02-06 12:18 14% ` [PATCH 13/17] lei: add-external --mirror support Eric Wong
2021-09-15 11:26  8% [PATCH] multi_git: hoist out common epoch/alternates handling Eric Wong
2022-11-28  5:30     [PATCH 00/95] clone: multi-inbox/repo support Eric Wong
2022-11-28  5:30  6% ` [PATCH 01/95] clone: support multi-inbox clone Eric Wong
2022-11-28  5:31  9% ` [PATCH 37/95] lei_mirror: require Perl v5.12+ Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).