From dbf250d9423ccc38377c35eef8d43e3e11723253 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 16 Dec 2020 23:19:04 +0000 Subject: inbox: simplify v2 epoch counting Perl readdir detects list context and can return an array suitable for the grep op. From there, we can rely on substr to remove the ".git" suffix and integerize the value to save a few bytes before letting List::Util::max return the value. This is how we detect Xapian shards nowadays, too, and we'll also use defined-or (//) to simplify the return value there. We'll also simplify InboxWritable->git_dir_latest, remove some callers, and consider removing it entirely. --- lib/PublicInbox/ExtSearchIdx.pm | 4 +--- lib/PublicInbox/Inbox.pm | 18 +++++++----------- lib/PublicInbox/InboxWritable.pm | 17 +++-------------- lib/PublicInbox/Search.pm | 3 +-- lib/PublicInbox/V2Writable.pm | 16 ++++------------ lib/PublicInbox/Xapcmd.pm | 3 +-- 6 files changed, 17 insertions(+), 44 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index f492734a..3764612c 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -290,9 +290,7 @@ sub _sync_inbox ($$$) { my $v = $ibx->version; my $ekey = $ibx->eidx_key; if ($v == 2) { - my $epoch_max; - defined($ibx->git_dir_latest(\$epoch_max)) or return; - $sync->{epoch_max} = $epoch_max; + $sync->{epoch_max} = $ibx->max_git_epoch // return; sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable } elsif ($v == 1) { my $uv = $ibx->uidvalidity; diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index bd1de0a0..8a3a0194 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -4,10 +4,10 @@ # Represents a public-inbox (which may have multiple mailing addresses) package PublicInbox::Inbox; use strict; -use warnings; use PublicInbox::Git; use PublicInbox::MID qw(mid2path); use PublicInbox::Eml; +use List::Util qw(max); # Long-running "git-cat-file --batch" processes won't notice # unlinked packs, so we need to restart those processes occasionally. @@ -155,19 +155,15 @@ sub max_git_epoch { my ($self) = @_; return if $self->version < 2; my $cur = $self->{-max_git_epoch}; - my $changed = git($self)->alternates_changed; - if (!defined($cur) || $changed) { + my $changed; + if (!defined($cur) || ($changed = git($self)->alternates_changed)) { git_cleanup($self) if $changed; my $gits = "$self->{inboxdir}/git"; if (opendir my $dh, $gits) { - my $max = -1; - while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A([0-9]+)\.git\z! or next; - $max = $1 if $1 > $max; - } - $cur = $self->{-max_git_epoch} = $max if $max >= 0; - } else { - warn "opendir $gits failed: $!\n"; + my $max = max(map { + substr($_, 0, -4) + 0; # drop ".git" suffix + } grep(/\A[0-9]+\.git\z/, readdir($dh))) // return; + $cur = $self->{-max_git_epoch} = $max; } } $cur; diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index bdfae2f8..48d2267f 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -303,22 +303,11 @@ sub warn_ignore_cb { # v2+ only sub git_dir_n { "$_[0]->{inboxdir}/git/$_[1].git" } -# v2+ only +# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove sub git_dir_latest { my ($self, $max) = @_; - $$max = -1; - my $pfx = "$self->{inboxdir}/git"; - return unless -d $pfx; - my $latest; - opendir my $dh, $pfx or die "opendir $pfx: $!\n"; - while (defined(my $git_dir = readdir($dh))) { - $git_dir =~ m!\A([0-9]+)\.git\z! or next; - if ($1 > $$max) { - $$max = $1; - $latest = "$pfx/$git_dir"; - } - } - $latest; + defined($$max = $self->max_git_epoch) ? + "$self->{inboxdir}/git/$$max.git" : undef; } 1; diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 803914b0..b1d38fb9 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -197,8 +197,7 @@ sub xdb_sharded { # We need numeric sorting so shard[0] is first for reading # Xapian metadata, if needed - my $last = max(grep(/\A[0-9]+\z/, readdir($dh))); - return if !defined($last); + my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return; my (@xdb, $slow_phrase); for (0..$last) { my $shard_dir = "$self->{xpfx}/$_"; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 992305c5..7b8b5abf 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -128,12 +128,9 @@ sub init_inbox { } $self->idx_init; $self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum; - my $epoch_max = -1; - $self->{ibx}->git_dir_latest(\$epoch_max); - if (defined $skip_epoch && $epoch_max == -1) { - $epoch_max = $skip_epoch; - } - $self->git_init($epoch_max >= 0 ? $epoch_max : 0); + my $max = $self->{ibx}->max_git_epoch; + $max = $skip_epoch if (defined($skip_epoch) && !defined($max)); + $self->git_init($max // 0); $self->done; } @@ -336,12 +333,7 @@ sub _replace_oids ($$$) { my $ibx = $self->{ibx}; my $pfx = "$ibx->{inboxdir}/git"; my $rewrites = []; # epoch => commit - my $max = $self->{epoch_max}; - - unless (defined($max)) { - defined(my $latest = $ibx->git_dir_latest(\$max)) or return; - $self->{epoch_max} = $max; - } + my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return; foreach my $i (0..$max) { my $git_dir = "$pfx/$i.git"; diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 4332943c..4f77ef25 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -109,8 +109,7 @@ sub prepare_reindex ($$$) { $opt->{reindex}->{from} = $lc; } } else { # v2 - my $max; - $ibx->git_dir_latest(\$max) or return; + my $max = $ibx->max_git_epoch // return; my $from = $opt->{reindex}->{from}; my $mm = $ibx->mm; my $v = PublicInbox::Search::SCHEMA_VERSION(); -- cgit v1.2.3-24-ge0c7