about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2022-11-28 05:31:33 +0000
committerEric Wong <e@80x24.org>2022-11-28 23:38:55 +0000
commitfa7736f23e708f2edeaae33d9ce91135cbc095e3 (patch)
treec7c9e6b374fa63c48ce26012e3e4dce3af002ff4
parentd604845055e3b0096024fd98b993a485a72653cd (diff)
downloadpublic-inbox-fa7736f23e708f2edeaae33d9ce91135cbc095e3.tar.gz
This is part of `lei add-external --mirror', and it makes
sense to have for development and testing.  We'll also add
a fallback in case somebody tries --inbox-version and fails
due to a newer remote instances of public-inbox.
-rw-r--r--Documentation/lei-add-external.pod4
-rw-r--r--Documentation/public-inbox-clone.pod6
-rw-r--r--lib/PublicInbox/LeiMirror.pm31
-rwxr-xr-xscript/public-inbox-clone2
4 files changed, 29 insertions, 14 deletions
diff --git a/Documentation/lei-add-external.pod b/Documentation/lei-add-external.pod
index 7afcad63..2a131b55 100644
--- a/Documentation/lei-add-external.pod
+++ b/Documentation/lei-add-external.pod
@@ -75,7 +75,9 @@ Default: C<auto>
 
 =item --inbox-version=NUM
 
-Force a public-inbox version (must be C<1> or C<2>).
+Force a remote public-inbox version (must be C<1> or C<2>).
+This is auto-detected by default, and this option exists mainly
+for testing.
 
 =back
 
diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod
index 52c89cfd..1c31fbb3 100644
--- a/Documentation/public-inbox-clone.pod
+++ b/Documentation/public-inbox-clone.pod
@@ -76,6 +76,12 @@ no v1 inboxes are present.
 
 Default: C<always>
 
+=item --inbox-version=NUM
+
+Force a remote public-inbox version (must be C<1> or C<2>).
+This is auto-detected by default, and this option exists mainly
+for testing.
+
 =item -n
 
 =item --dry-run
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index 18c825d3..c3512d43 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -43,7 +43,7 @@ sub _wq_done_wait { # dwaitpid callback (via wq_eof)
 
 # for old installations without manifest.js.gz
 sub try_scrape {
-        my ($self) = @_;
+        my ($self, $fallback_manifest) = @_;
         my $uri = URI->new($self->{src});
         my $lei = $self->{lei};
         my $curl = $self->{curl} //= PublicInbox::LeiCurl->new($lei) or return;
@@ -54,9 +54,17 @@ sub try_scrape {
         close($fh) or return $lei->child_error($?, "@$cmd failed");
 
         # we grep with URL below, we don't want Subject/From headers
-        # making us clone random URLs
+        # making us clone random URLs.  This assumes remote instances
+        # prior to public-inbox 1.7.0
+        # 5b96edcb1e0d8252 (www: move mirror instructions to /text/, 2021-08-28)
         my @html = split(/<hr>/, $html);
         my @urls = ($html[-1] =~ m!\bgit clone --mirror ([a-z\+]+://\S+)!g);
+        if (!@urls && $fallback_manifest) {
+                warn <<EOM;
+W: failed to extract URLs from $uri, trying manifest.js.gz...
+EOM
+                return start_clone_url($self);
+        }
         my $url = $uri->as_string;
         chop($url) eq '/' or die "BUG: $uri not canonicalized";
 
@@ -603,7 +611,6 @@ sub try_manifest {
         my $cmd = $curl->for_uri($lei, $uri, '-R', '-o', $fn);
         my %opt = map { $_ => $lei->{$_} } (0..2);
         my $cerr = run_reap($lei, $cmd, \%opt);
-        local $LIVE;
         if ($cerr) {
                 return try_scrape($self) if ($cerr >> 8) == 22; # 404 missing
                 return $lei->child_error($cerr, "@$cmd failed");
@@ -698,15 +705,15 @@ sub do_mirror { # via wq_io_do or public-inbox-clone
                 $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<"";
 --inbox-config must be one of `always', `v2', `v1', or `never'
 
-                my $iv = $lei->{opt}->{'inbox-version'};
-                if (defined $iv) {
-                        local $LIVE;
-                        return clone_v1($self) if $iv == 1;
-                        return try_scrape($self) if $iv == 2;
-                        die "bad --inbox-version=$iv\n";
-                }
-                return start_clone_url($self) if $self->{src} =~ m!://!;
-                die "TODO: cloning local directories not supported, yet";
+                local $LIVE;
+                my $iv = $lei->{opt}->{'inbox-version'} //
+                        return start_clone_url($self);
+                return clone_v1($self) if $iv == 1;
+                die "bad --inbox-version=$iv\n" if $iv != 2;
+                die <<EOM if $self->{src} !~ m!://!;
+cloning local v2 inboxes not supported
+EOM
+                try_scrape($self, 1);
         };
         $lei->fail($@) if $@;
 }
diff --git a/script/public-inbox-clone b/script/public-inbox-clone
index 3d980c97..2900f232 100755
--- a/script/public-inbox-clone
+++ b/script/public-inbox-clone
@@ -23,7 +23,7 @@ options:
     -C DIR            chdir to specified directory
 EOF
 GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@
-        inbox-config=s
+        inbox-config=s inbox-version=i
         dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help;
 if ($opt->{help}) { print $help; exit };
 require PublicInbox::Admin; # loads Config