about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiMirror.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/LeiMirror.pm')
-rw-r--r--lib/PublicInbox/LeiMirror.pm31
1 files changed, 19 insertions, 12 deletions
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 18c825d3..c3512d43 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -43,7 +43,7 @@ sub _wq_done_wait { # dwaitpid callback (via wq_eof)
 
 # for old installations without manifest.js.gz
 sub try_scrape {
-        my ($self) = @_;
+        my ($self, $fallback_manifest) = @_;
         my $uri = URI->new($self->{src});
         my $lei = $self->{lei};
         my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
@@ -54,9 +54,17 @@ sub try_scrape {
         close($fh) or return $lei->child_error($?, "@$cmd failed");
 
         # we grep with URL below, we don't want Subject/From headers
-        # making us clone random URLs
+        # making us clone random URLs.  This assumes remote instances
+        # prior to public-inbox 1.7.0
+        # 5b96edcb1e0d8252 (www: move mirror instructions to /text/, 2021-08-28)
         my @html = split(/<hr>/, $html);
         my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
+        if (!@urls && $fallback_manifest) {
+                warn <<EOM;
+W: failed to extract URLs from $uri, trying manifest.js.gz...
+EOM
+                return start_clone_url($self);
+        }
         my $url = $uri->as_string;
         chop($url) eq '/' or die "BUG: $uri not canonicalized";
 
@@ -603,7 +611,6 @@ sub try_manifest {
         my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn);
         my %opt = map { $_ => $lei->{$_} } (0..2);
         my $cerr = run_reap($lei, $cmd, \%opt);
-        local $LIVE;
         if ($cerr) {
                 return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing
                 return $lei->child_error($cerr, "@$cmd failed");
@@ -698,15 +705,15 @@ sub do_mirror { # via wq_io_do or public-inbox-clone
                 $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<"";
 --inbox-config must be one of `always', `v2', `v1', or `never'
 
-                my $iv = $lei->{opt}->{'inbox-version'};
-                if (defined $iv) {
-                        local $LIVE;
-                        return clone_v1($self) if $iv == 1;
-                        return try_scrape($self) if $iv == 2;
-                        die "bad --inbox-version=$iv\n";
-                }
-                return start_clone_url($self) if $self->{src} =~ m!://!;
-                die "TODO: cloning local directories not supported, yet";
+                local $LIVE;
+                my $iv = $lei->{opt}->{'inbox-version'} //
+                        return start_clone_url($self);
+                return clone_v1($self) if $iv == 1;
+                die "bad --inbox-version=$iv\n" if $iv != 2;
+                die <<EOM if $self->{src} !~ m!://!;
+cloning local v2 inboxes not supported
+EOM
+                try_scrape($self, 1);
         };
         $lei->fail($@) if $@;
 }