From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.1 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4BA4E203B9 for ; Mon, 28 Nov 2022 05:32:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1669613567; bh=tk3ztDoTNHx+SYcNrwPqN1AIcOthpBgSn1ZqkI4arC0=; h=From:To:Subject:Date:In-Reply-To:References:From; b=rz/4YW/jHlY1Id/X0inv+p2iUJaQwKwasv1hvV9IHHonzUX7q/X5xjbPrRuRA4RWy /iPpRXS94WoxUlG2P9dE9mOCKhCT9B8cj8PU4LAlZa0tbUPuk/k2acL1+zWIFp0gov /xxUUM8qrJukpukrTp1VP9mkEzmi9t4NXELRxRyc= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 69/95] lei_mirror: check fingerprints before fetching Date: Mon, 28 Nov 2022 05:32:06 +0000 Message-Id: <20221128053232.291618-70-e@80x24.org> In-Reply-To: <20221128053232.291618-1-e@80x24.org> References: <20221128053232.291618-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: While we currently don't check an existing on-disk manifest, using `git show-ref' can still save us precious network traffic. --- lib/PublicInbox/LeiMirror.pm | 54 +++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index f2111dd2..e744f06a 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -18,7 +18,7 @@ use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::LeiCurl; use PublicInbox::OnDestroy; -use Digest::SHA qw(sha256_hex); +use Digest::SHA qw(sha256_hex sha1_hex); our $LIVE; # pid => callback @@ -483,6 +483,36 @@ EOM bless { %$self, -osdir => $dir, -remote => $rn }, __PACKAGE__; } +sub fp_done { + my ($self, $go_fetch) = @_; + my $fh = delete $self->{-show_ref} // die 'BUG: no show-ref output'; + seek($fh, SEEK_SET, 0) or die "seek(show_ref): $!"; + $self->{-ent} // die 'BUG: no -ent'; + my $A = $self->{-ent}->{fingerprint} // die 'BUG: no fingerprint'; + my $B = sha1_hex(do { local $/; <$fh> } // die("read(show_ref): $!")); + return if $A ne $B; # $go_fetch->DESTROY fires + $go_fetch->cancel; + $self->{lei}->qerr("# $self->{-key} up-to-date"); +} + +sub cmp_fp_fetch { + my ($self, $go_fetch) = @_; + my $dst = $self->{cur_dst} // $self->{dst}; + my $cmd = ['git', "--git-dir=$dst", 'show-ref']; + my $opt = { 2 => $self->{lei}->{2} }; + open($opt->{1}, '+>', undef) or die "open(tmp): $!"; + $self->{-show_ref} = $opt->{1}; + my $done = PublicInbox::OnDestroy->new($$, \&fp_done, $self, $go_fetch); + start_cmd($self, $cmd, $opt, $done); +} + +sub resume_fetch_maybe { + my ($self, $uri, $fini) = @_; + my $go_fetch = PublicInbox::OnDestroy->new($$, \&resume_fetch, @_); + cmp_fp_fetch($self, $go_fetch) if $self->{-ent} && + defined($self->{-ent}->{fingerprint}); +} + sub resume_fetch { my ($self, $uri, $fini) = @_; my $dst = $self->{cur_dst} // $self->{dst}; @@ -500,6 +530,19 @@ sub resume_fetch { start_cmd($self, $cmd, $opt, $fini); } +sub fgrp_enqueue_maybe { + my ($self, $fgrp) = @_; + my $enq = PublicInbox::OnDestroy->new($$, \&fgrp_enqueue, $self, $fgrp); + cmp_fp_fetch($self, $enq) if $self->{-ent} && + defined($self->{-ent}->{fingerprint}); + # $enq->DESTROY calls fgrp_enqueue otherwise +} + +sub fgrp_enqueue { + my ($self, $fgrp) = @_; + push @{$self->{fgrp_todo}->{$fgrp->{-osdir}}}, $fgrp; +} + sub clone_v1 { my ($self, $nohang) = @_; my $lei = $self->{lei}; @@ -510,11 +553,13 @@ sub clone_v1 { $self->{-torsocks} //= $curl->torsocks($lei, $uri) or return; my $dst = $self->{cur_dst} // $self->{dst}; my $fini = PublicInbox::OnDestroy->new($$, \&v1_done, $self); + my $resume = -d $dst; if (my $fgrp = forkgroup_prep($self, $uri)) { $fgrp->{-fini} = $fini; - push @{$self->{fgrp_todo}->{$fgrp->{-osdir}}}, $fgrp; - } elsif (-d $dst) { - resume_fetch($self, $uri, $fini); + $resume ? fgrp_enqueue_maybe($self, $fgrp) : + fgrp_enqueue($self, $fgrp); + } elsif ($resume) { + resume_fetch_maybe($self, $uri, $fini); } else { # normal clone my $cmd = [ @{$self->{-torsocks}}, clone_cmd($lei, my $opt = {}), "$uri", $dst ]; @@ -844,6 +889,7 @@ EOM last; # restart %$todo iteration } } + do_reap($self, 1); # finish all fingerprint checks fgrp_fetch_all($self); do_reap($self, 1); }