user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/5] inbox: simplify v2 epoch counting
Date: Wed, 16 Dec 2020 23:19:04 +0000	[thread overview]
Message-ID: <20201216231906.6356-4-e@80x24.org> (raw)
In-Reply-To: <20201216231906.6356-1-e@80x24.org>

Perl readdir detects list context and can return an array
suitable for the grep op.  From there, we can rely on
substr to remove the ".git" suffix and integerize the value
to save a few bytes before letting List::Util::max return
the value.

This is how we detect Xapian shards nowadays, too, and
we'll also use defined-or (//) to simplify the return
value there.

We'll also simplify InboxWritable->git_dir_latest,
remove some callers, and consider removing it entirely.
---
 lib/PublicInbox/ExtSearchIdx.pm  |  4 +---
 lib/PublicInbox/Inbox.pm         | 18 +++++++-----------
 lib/PublicInbox/InboxWritable.pm | 17 +++--------------
 lib/PublicInbox/Search.pm        |  3 +--
 lib/PublicInbox/V2Writable.pm    | 16 ++++------------
 lib/PublicInbox/Xapcmd.pm        |  3 +--
 6 files changed, 17 insertions(+), 44 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index f492734a..3764612c 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -290,9 +290,7 @@ sub _sync_inbox ($$$) {
 	my $v = $ibx->version;
 	my $ekey = $ibx->eidx_key;
 	if ($v == 2) {
-		my $epoch_max;
-		defined($ibx->git_dir_latest(\$epoch_max)) or return;
-		$sync->{epoch_max} = $epoch_max;
+		$sync->{epoch_max} = $ibx->max_git_epoch // return;
 		sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable
 	} elsif ($v == 1) {
 		my $uv = $ibx->uidvalidity;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 4e3c23f3..0973bf94 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -4,10 +4,10 @@
 # Represents a public-inbox (which may have multiple mailing addresses)
 package PublicInbox::Inbox;
 use strict;
-use warnings;
 use PublicInbox::Git;
 use PublicInbox::MID qw(mid2path);
 use PublicInbox::Eml;
+use List::Util qw(max);
 
 # Long-running "git-cat-file --batch" processes won't notice
 # unlinked packs, so we need to restart those processes occasionally.
@@ -155,19 +155,15 @@ sub max_git_epoch {
 	my ($self) = @_;
 	return if $self->version < 2;
 	my $cur = $self->{-max_git_epoch};
-	my $changed = git($self)->alternates_changed;
-	if (!defined($cur) || $changed) {
+	my $changed;
+	if (!defined($cur) || ($changed = git($self)->alternates_changed)) {
 		git_cleanup($self) if $changed;
 		my $gits = "$self->{inboxdir}/git";
 		if (opendir my $dh, $gits) {
-			my $max = -1;
-			while (defined(my $git_dir = readdir($dh))) {
-				$git_dir =~ m!\A([0-9]+)\.git\z! or next;
-				$max = $1 if $1 > $max;
-			}
-			$cur = $self->{-max_git_epoch} = $max if $max >= 0;
-		} else {
-			warn "opendir $gits failed: $!\n";
+			my $max = max(map {
+				substr($_, 0, -4) + 0; # drop ".git" suffix
+			} grep(/\A[0-9]+\.git\z/, readdir($dh))) // return;
+			$cur = $self->{-max_git_epoch} = $max;
 		}
 	}
 	$cur;
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index bdfae2f8..48d2267f 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -303,22 +303,11 @@ sub warn_ignore_cb {
 # v2+ only
 sub git_dir_n { "$_[0]->{inboxdir}/git/$_[1].git" }
 
-# v2+ only
+# v2+ only, XXX: maybe we can just rely on ->max_git_epoch and remove
 sub git_dir_latest {
 	my ($self, $max) = @_;
-	$$max = -1;
-	my $pfx = "$self->{inboxdir}/git";
-	return unless -d $pfx;
-	my $latest;
-	opendir my $dh, $pfx or die "opendir $pfx: $!\n";
-	while (defined(my $git_dir = readdir($dh))) {
-		$git_dir =~ m!\A([0-9]+)\.git\z! or next;
-		if ($1 > $$max) {
-			$$max = $1;
-			$latest = "$pfx/$git_dir";
-		}
-	}
-	$latest;
+	defined($$max = $self->max_git_epoch) ?
+		"$self->{inboxdir}/git/$$max.git" : undef;
 }
 
 1;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 803914b0..b1d38fb9 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -197,8 +197,7 @@ sub xdb_sharded {
 
 	# We need numeric sorting so shard[0] is first for reading
 	# Xapian metadata, if needed
-	my $last = max(grep(/\A[0-9]+\z/, readdir($dh)));
-	return if !defined($last);
+	my $last = max(grep(/\A[0-9]+\z/, readdir($dh))) // return;
 	my (@xdb, $slow_phrase);
 	for (0..$last) {
 		my $shard_dir = "$self->{xpfx}/$_";
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 992305c5..7b8b5abf 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -128,12 +128,9 @@ sub init_inbox {
 	}
 	$self->idx_init;
 	$self->{mm}->skip_artnum($skip_artnum) if defined $skip_artnum;
-	my $epoch_max = -1;
-	$self->{ibx}->git_dir_latest(\$epoch_max);
-	if (defined $skip_epoch && $epoch_max == -1) {
-		$epoch_max = $skip_epoch;
-	}
-	$self->git_init($epoch_max >= 0 ? $epoch_max : 0);
+	my $max = $self->{ibx}->max_git_epoch;
+	$max = $skip_epoch if (defined($skip_epoch) && !defined($max));
+	$self->git_init($max // 0);
 	$self->done;
 }
 
@@ -336,12 +333,7 @@ sub _replace_oids ($$$) {
 	my $ibx = $self->{ibx};
 	my $pfx = "$ibx->{inboxdir}/git";
 	my $rewrites = []; # epoch => commit
-	my $max = $self->{epoch_max};
-
-	unless (defined($max)) {
-		defined(my $latest = $ibx->git_dir_latest(\$max)) or return;
-		$self->{epoch_max} = $max;
-	}
+	my $max = $self->{epoch_max} //= $ibx->max_git_epoch // return;
 
 	foreach my $i (0..$max) {
 		my $git_dir = "$pfx/$i.git";
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 4332943c..4f77ef25 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -109,8 +109,7 @@ sub prepare_reindex ($$$) {
 			$opt->{reindex}->{from} = $lc;
 		}
 	} else { # v2
-		my $max;
-		$ibx->git_dir_latest(\$max) or return;
+		my $max = $ibx->max_git_epoch // return;
 		my $from = $opt->{reindex}->{from};
 		my $mm = $ibx->mm;
 		my $v = PublicInbox::Search::SCHEMA_VERSION();

  parent reply	other threads:[~2020-12-16 23:19 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-16 23:19 [PATCH 0/5] various indexing cleanups and quieting Eric Wong
2020-12-16 23:19 ` [PATCH 1/5] inboxwritable: warn_ignore: "Bad UTF7 data escape" Eric Wong
2020-12-16 23:19 ` [PATCH 2/5] index: ignore some warnings, set {current_info} for v1 Eric Wong
2020-12-16 23:19 ` Eric Wong [this message]
2020-12-16 23:19 ` [PATCH 4/5] inboxwritable: drop git_dir_n sub Eric Wong
2020-12-16 23:19 ` [PATCH 5/5] extsearchidx: no need to make InboxWritable Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201216231906.6356-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).