From fb4dd7fdeeed8478cda9b7e63e56564da8cbdacf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:12 +0000 Subject: index: do not attach inbox to extindex unless updated We'll count the number of log changes (regardless of index or unindex) and only attach inboxes to ExtSearchIdx objects when they get new work. We'll also reduce lock bouncing and only update external indices after all per-inbox indexing is done. This also updates existing v2 indexing/unindexing callers to be more consistent and ensures unindex log entries update per-inbox last commit information. --- lib/PublicInbox/Admin.pm | 1 + lib/PublicInbox/SearchIdx.pm | 2 ++ lib/PublicInbox/V2Writable.pm | 26 +++++++++++++++++++------- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 9a86d206..b468108e 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -271,6 +271,7 @@ EOM $idx = PublicInbox::SearchIdx->new($ibx, 1); } $idx->index_sync($opt); + $idx->{nidx} // 0; # returns number processed } sub progress_prepare ($) { diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index c8e309fc..b3361e05 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -615,6 +615,7 @@ sub index_both { # git->cat_async callback $smsg->{num} = index_mm($self, $eml, $oid, $sync) or die "E: could not generate NNTP article number for $oid"; add_message($self, $eml, $smsg, $sync); + ++$self->{nidx}; my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing'; ${$sync->{latest_cmt}} = $cur_cmt; } @@ -629,6 +630,7 @@ sub unindex_both { # git->cat_async callback if (defined(my $cur_cmt = $sync->{cur_cmt})) { ${$sync->{latest_cmt}} = $cur_cmt; } + ++$self->{nidx}; } sub with_umask { diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 2b849ddf..ca52874b 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -891,12 +891,22 @@ sub reindex_checkpoint ($$) { $mm_tmp->atfork_parent if $mm_tmp; } +sub index_finalize ($$) { + my ($arg, $index) = @_; + ++$arg->{self}->{nidx}; + if (defined(my $cur = $arg->{cur_cmt})) { + ${$arg->{latest_cmt}} = $cur; + } elsif ($index) { + die 'BUG: {cur_cmt} missing'; + } # else { unindexing @leftovers doesn't set {cur_cmt} +} + sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; - return if is_bad_blob($oid, $type, $size, $arg->{oid}); + is_bad_blob($oid, $type, $size, $arg->{oid}) and + return index_finalize($arg, 1); # size == 0 purged returns here my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; - return if $size == 0; # purged my ($num, $mid0); my $eml = PublicInbox::Eml->new($$bref); my $mids = mids($eml); @@ -967,7 +977,7 @@ sub index_oid { # cat_async callback if (do_idx($self, $bref, $eml, $smsg)) { ${$arg->{need_checkpoint}} = 1; } - ${$arg->{latest_cmt}} = $arg->{cur_cmt} // die 'BUG: {cur_cmt} missing'; + index_finalize($arg, 1); } # only update last_commit for $i on reindex iff newer than current @@ -1157,11 +1167,12 @@ sub unindex_oid_aux ($$$) { } sub unindex_oid ($$;$) { # git->cat_async callback - my ($bref, $oid, $type, $size, $sync) = @_; - return if is_bad_blob($oid, $type, $size, $sync->{oid}); - my $self = $sync->{self}; + my ($bref, $oid, $type, $size, $arg) = @_; + is_bad_blob($oid, $type, $size, $arg->{oid}) and + return index_finalize($arg, 0); + my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; - my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; + my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef; my $mm = $self->{mm}; my $mids = mids(PublicInbox::Eml->new($bref)); undef $$bref; @@ -1186,6 +1197,7 @@ sub unindex_oid ($$;$) { # git->cat_async callback } unindex_oid_aux($self, $oid, $mid); } + index_finalize($arg, 0); } sub git { $_[0]->{ibx}->git } -- cgit v1.2.3-24-ge0c7