From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 831EC1F910 for ; Mon, 28 Nov 2022 05:32:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1669613567; bh=g+8C2cqkV11Q8WJQLV3oTnjoy19BpU3+MMvW03eum+w=; h=From:To:Subject:Date:In-Reply-To:References:From; b=xa12RMXDCLZVfQwIycXz9rNdjz9SvLSBaAHaXnKLV89L5rCqJ6Q1PzR+Tei0qK8OM 8jSZUsHSd4bOe7cKgmHmt+DmDVdAQyT0qhL60KcHmjKmUBNIfdRFpfsqRS43cyesRv 0v1M0t3iaxiEbVRdk+QQJRMzOU24ibBpG7AMLXHw= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 70/95] clone: support loading manifest.js.gz from destination Date: Mon, 28 Nov 2022 05:32:07 +0000 Message-Id: <20221128053232.291618-71-e@80x24.org> In-Reply-To: <20221128053232.291618-1-e@80x24.org> References: <20221128053232.291618-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This will allow us to quickly check fingerprints against remotes with a single HTTP(S) request, saving us numerous `git show-refs' invocations. --- Documentation/public-inbox-clone.pod | 10 ++++++++ lib/PublicInbox/LeiMirror.pm | 37 ++++++++++++++++++++++++---- script/public-inbox-clone | 2 +- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/Documentation/public-inbox-clone.pod b/Documentation/public-inbox-clone.pod index 257967d9..9288b175 100644 --- a/Documentation/public-inbox-clone.pod +++ b/Documentation/public-inbox-clone.pod @@ -94,6 +94,16 @@ C directory. If only C<--objstore=> is specified where C is an empty string (C<"">), then C (C<$DESTINATION/objstore>) is the implied value of C. +=item --manifest=FILE + +When incrementally updating an existing mirror, load the given +manifest (typically C) to speed up updates. + +If C is not an absolute path, it is relative to the +C directory. If only C<--manifest => is specified +where C is an empty string (C<"">), then C +(C<$DESTINATION/manifest.js.gz>) is the implied value of C. + =item -n =item --dry-run diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index e744f06a..51cc6d05 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -497,6 +497,13 @@ sub fp_done { sub cmp_fp_fetch { my ($self, $go_fetch) = @_; + # $go_fetch is either resume_fetch or fgrp_enqueue + my $new = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint'; + my $key = $self->{-key} // die 'BUG: no -key'; + if (my $cur_ent = $self->{-local_manifest}->{$key}) { + # runs go_fetch->DESTROY run if eq + return $go_fetch->cancel if $cur_ent->{fingerprint} eq $new; + } my $dst = $self->{cur_dst} // $self->{dst}; my $cmd = ['git', "--git-dir=$dst", 'show-ref']; my $opt = { 2 => $self->{lei}->{2} }; @@ -677,7 +684,10 @@ sub v1_done { # called via OnDestroy _write_inbox_config($self); my $dst = $self->{cur_dst} // $self->{dst}; if (defined(my $o = $self->{-ent} ? $self->{-ent}->{owner} : undef)) { - run_die([qw(git config -f), "$dst/config", 'gitweb.owner', $o]); + my $key = $self->{-key} // die 'BUG: no -key'; + my $cur = $self->{-local_manifest}->{$key}->{owner} // "\0"; + $cur eq $o or run_die([qw(git config -f), + "$dst/config", 'gitweb.owner', $o]); } my $o = "$dst/objects"; if (open(my $fh, '<', my $fn = "$o/info/alternates")) {; @@ -796,6 +806,19 @@ sub decode_manifest ($$$) { $m; } +sub load_current_manifest ($) { + my ($self) = @_; + my $fn = $self->{-manifest} // return; + if (open(my $fh, '<', $fn)) { + decode_manifest($fh, $fn, $fn); + } elsif ($!{ENOENT}) { # non-fatal, we can just do it slowly + warn "open($fn): $!\n"; + undef; + } else { + die "open($fn): $!\n"; + } +} + sub multi_inbox ($$$) { my ($self, $path, $m) = @_; my $incl = $self->{lei}->{opt}->{include}; @@ -932,6 +955,7 @@ sub try_manifest { warn $@; return try_scrape($self); } + local $self->{-local_manifest} = load_current_manifest($self); my ($path_pfx, $n, $multi) = multi_inbox($self, \$path, $m); return $lei->child_error(1, $multi) if !ref($multi); my $v2 = delete $multi->{v2}; @@ -1012,10 +1036,13 @@ sub do_mirror { # via wq_io_do or public-inbox-clone $ic =~ /\A(?:v1|v2|always|never)\z/s or die <<""; --inbox-config must be one of `always', `v2', `v1', or `never' - if (defined(my $os = $lei->{opt}->{objstore})) { - $os = 'objstore' if $os eq ''; # --objstore w/o args - $os = "$self->{dst}/$os" if $os !~ m!\A/!; - $self->{-objstore} = $os; + # we support --objstore= and --manifest= with '' (empty string) + for my $default (qw(objstore manifest.js.gz)) { + my ($k) = (split(/\./, $default))[0]; + my $v = $lei->{opt}->{$k} // next; + $v = $default if $v eq ''; + $v = "$self->{dst}/$v" if $v !~ m!\A/!; + $self->{"-$k"} = $v; } local $LIVE; my $iv = $lei->{opt}->{'inbox-version'} // diff --git a/script/public-inbox-clone b/script/public-inbox-clone index e38d7b0d..a11c6874 100755 --- a/script/public-inbox-clone +++ b/script/public-inbox-clone @@ -23,7 +23,7 @@ options: -C DIR chdir to specified directory EOF GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ - inbox-config=s inbox-version=i objstore=s + inbox-config=s inbox-version=i objstore=s manifest=s dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; if ($opt->{help}) { print $help; exit }; require PublicInbox::Admin; # loads Config