about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/public-inbox-clone.pod10
-rw-r--r--lib/PublicInbox/LeiMirror.pm37
-rwxr-xr-xscript/public-inbox-clone2
3 files changed, 43 insertions, 6 deletions
diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod
index 257967d9..9288b175 100644
--- a/Documentation/public-inbox-clone.pod
+++ b/Documentation/public-inbox-clone.pod
@@ -94,6 +94,16 @@ C<DESTINATION> directory.  If only C<--objstore=> is specified
 where C<DIR> is an empty string (C<"">), then C<objstore>
 (C<$DESTINATION/objstore>) is the implied value of C<DIR>.
 
+=item --manifest=FILE
+
+When incrementally updating an existing mirror, load the given
+manifest (typically C<manifest.js.gz>) to speed up updates.
+
+If C<FILE> is not an absolute path, it is relative to the
+C<DESTINATION> directory.  If only C<--manifest => is specified
+where C<FILE > is an empty string (C<"">), then C<manifest.js.gz>
+(C<$DESTINATION/manifest.js.gz>) is the implied value of C<FILE>.
+
 =item -n
 
 =item --dry-run
diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm
index e744f06a..51cc6d05 100644
--- a/lib/PublicInbox/LeiMirror.pm
+++ b/lib/PublicInbox/LeiMirror.pm
@@ -497,6 +497,13 @@ sub fp_done {
 
 sub cmp_fp_fetch {
         my ($self, $go_fetch) = @_;
+        # $go_fetch is either resume_fetch or fgrp_enqueue
+        my $new = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint';
+        my $key = $self->{-key} // die 'BUG: no -key';
+        if (my $cur_ent = $self->{-local_manifest}->{$key}) {
+                # runs go_fetch->DESTROY run if eq
+                return $go_fetch->cancel if $cur_ent->{fingerprint} eq $new;
+        }
         my $dst = $self->{cur_dst} // $self->{dst};
         my $cmd = ['git', "--git-dir=$dst", 'show-ref'];
         my $opt = { 2 => $self->{lei}->{2} };
@@ -677,7 +684,10 @@ sub v1_done { # called via OnDestroy
         _write_inbox_config($self);
         my $dst = $self->{cur_dst} // $self->{dst};
         if (defined(my $o = $self->{-ent} ? $self->{-ent}->{owner} : undef)) {
-                run_die([qw(git config -f), "$dst/config", 'gitweb.owner', $o]);
+                my $key = $self->{-key} // die 'BUG: no -key';
+                my $cur = $self->{-local_manifest}->{$key}->{owner} // "\0";
+                $cur eq $o or run_die([qw(git config -f),
+                                        "$dst/config", 'gitweb.owner', $o]);
         }
         my $o = "$dst/objects";
         if (open(my $fh, '<', my $fn = "$o/info/alternates")) {;
@@ -796,6 +806,19 @@ sub decode_manifest ($$$) {
         $m;
 }
 
+sub load_current_manifest ($) {
+        my ($self) = @_;
+        my $fn = $self->{-manifest} // return;
+        if (open(my $fh, '<', $fn)) {
+                decode_manifest($fh, $fn, $fn);
+        } elsif ($!{ENOENT}) { # non-fatal, we can just do it slowly
+                warn "open($fn): $!\n";
+                undef;
+        } else {
+                die "open($fn): $!\n";
+        }
+}
+
 sub multi_inbox ($$$) {
         my ($self, $path, $m) = @_;
         my $incl = $self->{lei}->{opt}->{include};
@@ -932,6 +955,7 @@ sub try_manifest {
                 warn $@;
                 return try_scrape($self);
         }
+        local $self->{-local_manifest} = load_current_manifest($self);
         my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m);
         return $lei->child_error(1, $multi) if !ref($multi);
         my $v2 = delete $multi->{v2};
@@ -1012,10 +1036,13 @@ sub do_mirror { # via wq_io_do or public-inbox-clone
                 $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<"";
 --inbox-config must be one of `always', `v2', `v1', or `never'
 
-                if (defined(my $os = $lei->{opt}->{objstore})) {
-                        $os = 'objstore' if $os eq ''; # --objstore w/o args
-                        $os = "$self->{dst}/$os" if $os !~ m!\A/!;
-                        $self->{-objstore} = $os;
+                # we support --objstore= and --manifest= with '' (empty string)
+                for my $default (qw(objstore manifest.js.gz)) {
+                        my ($k) = (split(/\./, $default))[0];
+                        my $v = $lei->{opt}->{$k} // next;
+                        $v = $default if $v eq '';
+                        $v = "$self->{dst}/$v" if $v !~ m!\A/!;
+                        $self->{"-$k"} = $v;
                 }
                 local $LIVE;
                 my $iv = $lei->{opt}->{'inbox-version'} //
diff --git a/script/public-inbox-clone b/script/public-inbox-clone
index e38d7b0d..a11c6874 100755
--- a/script/public-inbox-clone
+++ b/script/public-inbox-clone
@@ -23,7 +23,7 @@ options:
     -C DIR            chdir to specified directory
 EOF
 GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@
-        inbox-config=s inbox-version=i objstore=s
+        inbox-config=s inbox-version=i objstore=s manifest=s
         dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help;
 if ($opt->{help}) { print $help; exit };
 require PublicInbox::Admin; # loads Config