From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 93B311F625 for ; Sun, 12 Feb 2023 23:19:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1676243985; bh=2h4odG+xZy6Ng4OpBUfTnfYkDuRCqtDDIiDx32SaoEk=; h=From:To:Subject:Date:In-Reply-To:References:From; b=DF3A1jh43AG4eNhTuzAgPRasVBQjED2HYDlseHn8twcf8j5QhKsGbEyWROQYyRvm6 i51PoECWyT5gMSUHUwqAqViBY8LB88r/e+SwGs7TolhQ7qu0vb1eECDTYWPLmKnn3b 6kOlB/HoZCB3kF9PqpN7AaAeydPyeIoyqVJg/nso= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] lei_mirror: fetch most-recently-updated repos, first Date: Sun, 12 Feb 2023 23:18:28 +0000 Message-Id: <20230212231828.33336-3-e@80x24.org> In-Reply-To: <20230212231828.33336-1-e@80x24.org> References: <20230212231828.33336-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Within the same forkgroup, we can assume the most recently updated repo has the most data, so fetch those, first. We'll save new clones for last since we can preserve {reference} ordering for them. --- lib/PublicInbox/LeiMirror.pm | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/LeiMirror.pm b/lib/PublicInbox/LeiMirror.pm index dd6356bb..4dedac9b 100644 --- a/lib/PublicInbox/LeiMirror.pm +++ b/lib/PublicInbox/LeiMirror.pm @@ -23,7 +23,7 @@ use PublicInbox::SHA qw(sha256_hex sha1_hex); use POSIX qw(strftime); our $LIVE; # pid => callback -our $FGRP_TODO; # objstore -> [ fgrp mirror objects ] +our $FGRP_TODO; # objstore -> [[ to resume ], [ to clone ]] our $TODO; # reference => [ non-fgrp mirror objects ] our @PUH; # post-update hooks @@ -404,9 +404,12 @@ sub fgrp_fetch_all { (fetch_args($self->{lei}, $opt), qw(--no-tags --multiple)); }; push(@fetch, "-j$j") if $j; - while (my ($osdir, $fgrpv) = each %$todo) { + while (my ($osdir, $fgrp_old_new) = each %$todo) { my $f = "$osdir/config"; return if !keep_going($self); + my ($fgrpv, $new) = @$fgrp_old_new; + @$fgrpv = sort { $b->{-sort} <=> $a->{-sort} } @$fgrpv; + push @$fgrpv, @$new; # $new is ordered by references my $cmd = ['git', "--git-dir=$osdir", qw(config -f), $f ]; # clobber group from previous run atomically @@ -568,7 +571,8 @@ sub fgrp_enqueue { my ($fgrp, $end) = @_; # $end calls fgrp_fetch_all return if !keep_going($fgrp); ++$fgrp->{chg}->{nr_chg}; - push @{$FGRP_TODO->{$fgrp->{-osdir}}}, $fgrp; + my $dst = $FGRP_TODO->{$fgrp->{-osdir}} //= [ [], [] ]; # [ old, new ] + push @{$dst->[defined($fgrp->{-sort} ? 0 : 1)]}, $fgrp; } sub clone_v1 { @@ -586,8 +590,12 @@ sub clone_v1 { my $resume = -d $dst; if (my $fgrp = forkgroup_prep($self, $uri)) { $fgrp->{-fini} = $fini; - $resume ? cmp_fp_do($fgrp, \&fgrp_enqueue, $end) - : fgrp_enqueue($fgrp, $end); + if ($resume) { + $fgrp->{-sort} = $fgrp->{-ent}->{modified}; + cmp_fp_do($fgrp, \&fgrp_enqueue, $end); + } else { # new repo, save for last + fgrp_enqueue($fgrp, $end); + } } elsif ($resume) { cmp_fp_do($self, \&resume_fetch, $uri, $fini); } else { # normal clone