user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/7] extsearchidx: delay SQLite availability checks
Date: Fri, 25 Dec 2020 10:21:10 +0000	[thread overview]
Message-ID: <20201225102115.6745-3-e@80x24.org> (raw)
In-Reply-To: <20201225102115.6745-1-e@80x24.org>

This will make attach_inbox faster for no-op calls.  It also
helps us avoid races in case msgmap or over.sqlite3 gets
unlinked while -extindex is running.
---
 lib/PublicInbox/ExtSearchIdx.pm | 57 ++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index c43a6c5e..386e1cee 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -61,23 +61,7 @@ sub new {
 
 sub attach_inbox {
 	my ($self, $ibx) = @_;
-	my $ekey = $ibx->eidx_key;
-	my $misc = $self->{misc};
-	if ($misc && $misc->inbox_data($ibx)) { # all good if already indexed
-	} else {
-		my @sqlite = ($ibx->over, $ibx->mm);
-		my $uidvalidity = $ibx->uidvalidity;
-		$ibx->{mm} = $ibx->{over} = undef;
-		if (scalar(@sqlite) != 2) {
-			warn "W: skipping $ekey (unindexed)\n";
-			return;
-		}
-		if (!defined($uidvalidity)) {
-			warn "W: skipping $ekey (no UIDVALIDITY)\n";
-			return;
-		}
-	}
-	$self->{ibx_map}->{$ekey} //= do {
+	$self->{ibx_map}->{$ibx->eidx_key} //= do {
 		push @{$self->{ibx_list}}, $ibx;
 		$ibx;
 	}
@@ -281,29 +265,36 @@ sub last_commits {
 	$heads;
 }
 
+sub _ibx_index_reject ($) {
+	my ($ibx) = @_;
+	$ibx->mm // return 'unindexed, no msgmap.sqlite3';
+	$ibx->uidvalidity // return 'no UIDVALIDITY';
+	$ibx->over // return 'unindexed, no over.sqlite3';
+	undef;
+}
+
 sub _sync_inbox ($$$) {
 	my ($self, $sync, $ibx) = @_;
+	my $ekey = $ibx->eidx_key;
+	if (defined(my $err = _ibx_index_reject($ibx))) {
+		return "W: skipping $ekey ($err)";
+	}
 	$sync->{ibx} = $ibx;
 	$sync->{nr} = \(my $nr = 0);
 	my $v = $ibx->version;
-	my $ekey = $ibx->eidx_key;
 	if ($v == 2) {
 		$sync->{epoch_max} = $ibx->max_git_epoch // return;
 		sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable
 	} elsif ($v == 1) {
 		my $uv = $ibx->uidvalidity;
 		my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv");
-		my $head = $ibx->mm->last_commit;
-		unless (defined $head) {
-			warn "E: $ibx->{inboxdir} is not indexed\n";
-			return;
-		}
+		my $head = $ibx->mm->last_commit //
+			return "E: $ibx->{inboxdir} is not indexed";
 		my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head);
 		my $unit = { stack => $stk, git => $ibx->git };
 		push @{$sync->{todo}}, $unit;
 	} else {
-		warn "E: $ekey unsupported inbox version (v$v)\n";
-		return;
+		return "E: $ekey unsupported inbox version (v$v)";
 	}
 	for my $unit (@{delete($sync->{todo}) // []}) {
 		last if $sync->{quit};
@@ -311,6 +302,7 @@ sub _sync_inbox ($$$) {
 	}
 	$self->{midx}->index_ibx($ibx) unless $sync->{quit};
 	$ibx->git->cleanup; # done with this inbox, now
+	undef;
 }
 
 sub gc_unref_doc ($$$$) {
@@ -787,9 +779,14 @@ DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ?
 
 sub _reindex_inbox ($$$) {
 	my ($self, $sync, $ibx) = @_;
-	local $self->{current_info} = $ibx->eidx_key;
-	_reindex_check_unseen($self, $sync, $ibx);
-	_reindex_check_stale($self, $sync, $ibx) unless $sync->{quit};
+	my $ekey = $ibx->eidx_key;
+	local $self->{current_info} = $ekey;
+	if (defined(my $err = _ibx_index_reject($ibx))) {
+		warn "W: cannot reindex $ekey ($err)\n";
+	} else {
+		_reindex_check_unseen($self, $sync, $ibx);
+		_reindex_check_stale($self, $sync, $ibx) unless $sync->{quit};
+	}
 	delete @$ibx{qw(over mm search git)}; # won't need these for a bit
 }
 
@@ -847,7 +844,9 @@ sub eidx_sync { # main entry point
 	# don't use $_ here, it'll get clobbered by reindex_checkpoint
 	for my $ibx (@{$self->{ibx_list}}) {
 		last if $sync->{quit};
-		_sync_inbox($self, $sync, $ibx);
+		my $err = _sync_inbox($self, $sync, $ibx);
+		delete @$ibx{qw(mm over)};
+		warn $err, "\n" if defined($err);
 	}
 	$self->{oidx}->rethread_done($opt) unless $sync->{quit};
 	eidxq_process($self, $sync) unless $sync->{quit};

  parent reply	other threads:[~2020-12-25 10:21 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-25 10:21 [PATCH 0/7] index + extindex interaction improvements Eric Wong
2020-12-25 10:21 ` [PATCH 1/7] index: disable --fast-noop on --reindex Eric Wong
2020-12-25 10:21 ` Eric Wong [this message]
2020-12-25 10:21 ` [PATCH 3/7] extsearchidx: close DB handles after use if FD constrained Eric Wong
2020-12-25 10:21 ` [PATCH 4/7] index: do not attach inbox to extindex unless updated Eric Wong
2020-12-25 10:21 ` [PATCH 5/7] index: fix --no-fsync flag propagation to extindex Eric Wong
2020-12-25 10:21 ` [PATCH 6/7] v2writable: don't verify tip if reindexing Eric Wong
2020-12-25 10:21 ` [PATCH 7/7] index: filter out indexlevel=basic from extindex Eric Wong
2020-12-25 10:39 ` [PATCH 0/7] index + extindex interaction improvements Eric Wong
2020-12-26  1:44   ` [PATCH 0/3] extindex --watch support Eric Wong
2020-12-26  1:44     ` [PATCH 1/3] default to CORE::warn in $SIG{__WARN__} handlers Eric Wong
2020-12-26  1:44     ` [PATCH 2/3] extindex: --watch for inotify-based updates Eric Wong
2020-12-26  1:44     ` [PATCH 3/3] init: use the return value of rel2abs_collapsed Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201225102115.6745-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).