From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4E60E1FA13 for ; Wed, 16 Dec 2020 23:19:07 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/5] inbox: simplify v2 epoch counting Date: Wed, 16 Dec 2020 23:19:04 +0000 Message-Id: <20201216231906.6356-4-e@80x24.org> In-Reply-To: <20201216231906.6356-1-e@80x24.org> References: <20201216231906.6356-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Perl readdir detects list context and can return an array suitable for the grep op. From there, we can rely on substr to remove the ".git" suffix and integerize the value to save a few bytes before letting List::Util::max return the value. This is how we detect Xapian shards nowadays, too, and we'll also use defined-or (//) to simplify the return value there. We'll also simplify InboxWritable->git_dir_latest, remove some callers, and consider removing it entirely. --- lib/PublicInbox/ExtSearchIdx.pm | 4 +--- lib/PublicInbox/Inbox.pm | 18 +++++++----------- lib/PublicInbox/InboxWritable.pm | 17 +++-------------- lib/PublicInbox/Search.pm | 3 +-- lib/PublicInbox/V2Writable.pm | 16 ++++------------ lib/PublicInbox/Xapcmd.pm | 3 +-- 6 files changed, 17 insertions(+), 44 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index f492734a..3764612c 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -290,9 +290,7 @@ sub _sync_inbox ($$$) { my $v = $ibx->version; my $ekey = $ibx->eidx_key; if ($v == 2) { - my $epoch_max; - defined($ibx->git_dir_latest(\$epoch_max)) or return; - $sync->{epoch_max} = $epoch_max; + $sync->{epoch_max} = $ibx->max_git_epoch // return; sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable } elsif ($v == 1) { my $uv = $ibx->uidvalidity; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 4e3c23f3..0973bf94 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -4,10 +4,10 @@ # Represents a public-inbox (which may have multiple mailing addresses) package PublicInbox::Inbox; use strict; -use warnings; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); use PublicInbox::Eml; +use List::Util qw(max); # Long-running "git-cat-file --batch" processes won't notice # unlinked packs, so we need to restart those processes occasionally. @@ -155,19 +155,15 @@ sub max_git_epoch { my ($self) = @_; return if $self->version < 2; my $cur = $self->{-max_git_epoch}; - my $changed = git($self)->alternates_changed; - if (!defined($cur) || $changed) { + my $changed; + if (!defined($cur) || ($changed = git($self)->alternates_changed)) { git_cleanup($self) if $changed; my $gits = "$self->{inboxdir}/git"; if (opendir my $dh, $gits) { - my $max = -1; - while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A([0-9]+)\.git\z! or next; - $max = $1 if $1 > $max; - } - $cur = $self->{-max_git_epoch} = $max if $max >= 0; - } else { - warn "opendir $gits failed: $!\n"; + my $max = max(map { + substr($_, 0, -4) + 0; # drop ".git" suffix + } grep(/\A[0-9]+\.git\z/, readdir($dh))) // return; + $cur = $self->{-max_git_epoch} = $max; } } $cur; diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index bdfae2f8..48d2267f 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -303,22 +303,11 @@ sub warn_ignore_cb { # v2+ only sub git_dir_n { "$_[0]->{inboxdir}/git/$_[1].git" } -# v2+ only +# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove sub git_dir_latest { my ($self, $max) = @_; - $$max = -1; - my $pfx = "$self->{inboxdir}/git"; - return unless -d $pfx; - my $latest; - opendir my $dh, $pfx or die "opendir $pfx: $!\n"; - while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A([0-9]+)\.git\z! or next; - if ($1 > $$max) { - $$max = $1; - $latest = "$pfx/$git_dir"; - } - } - $latest; + defined($$max = $self->max_git_epoch) ? + "$self->{inboxdir}/git/$$max.git" : undef; } 1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 803914b0..b1d38fb9 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -197,8 +197,7 @@ sub xdb_sharded { # We need numeric sorting so shard[0] is first for reading # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))); - return if !defined($last); + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; my (@xdb, $slow_phrase); for (0..$last) { my $shard_dir = "$self->{xpfx}/$_"; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 992305c5..7b8b5abf 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -128,12 +128,9 @@ sub init_inbox { } $self->idx_init; $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum; - my $epoch_max = -1; - $self->{ibx}->git_dir_latest(\$epoch_max); - if (defined $skip_epoch && $epoch_max == -1) { - $epoch_max = $skip_epoch; - } - $self->git_init($epoch_max >= 0 ? $epoch_max : 0); + my $max = $self->{ibx}->max_git_epoch; + $max = $skip_epoch if (defined($skip_epoch) && !defined($max)); + $self->git_init($max // 0); $self->done; } @@ -336,12 +333,7 @@ sub _replace_oids ($$$) { my $ibx = $self->{ibx}; my $pfx = "$ibx->{inboxdir}/git"; my $rewrites = []; # epoch => commit - my $max = $self->{epoch_max}; - - unless (defined($max)) { - defined(my $latest = $ibx->git_dir_latest(\$max)) or return; - $self->{epoch_max} = $max; - } + my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return; foreach my $i (0..$max) { my $git_dir = "$pfx/$i.git"; diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 4332943c..4f77ef25 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -109,8 +109,7 @@ sub prepare_reindex ($$$) { $opt->{reindex}->{from} = $lc; } } else { # v2 - my $max; - $ibx->git_dir_latest(\$max) or return; + my $max = $ibx->max_git_epoch // return; my $from = $opt->{reindex}->{from}; my $mm = $ibx->mm; my $v = PublicInbox::Search::SCHEMA_VERSION();