about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-25 10:21:12 +0000
committerEric Wong <e@80x24.org>2020-12-26 06:22:56 +0000
commitfb4dd7fdeeed8478cda9b7e63e56564da8cbdacf (patch)
treee50ce0cadd50c5210c0b4f53239e8db605a529bc /lib
parent14e606423429d6121c295c2bc0599fe1bf66b07c (diff)
downloadpublic-inbox-fb4dd7fdeeed8478cda9b7e63e56564da8cbdacf.tar.gz
We'll count the number of log changes (regardless of index or
unindex) and only attach inboxes to ExtSearchIdx objects when
they get new work.  We'll also reduce lock bouncing and only
update external indices after all per-inbox indexing is done.

This also updates existing v2 indexing/unindexing callers
to be more consistent and ensures unindex log entries update
per-inbox last commit information.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Admin.pm1
-rw-r--r--lib/PublicInbox/SearchIdx.pm2
-rw-r--r--lib/PublicInbox/V2Writable.pm26
3 files changed, 22 insertions, 7 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 9a86d206..b468108e 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -271,6 +271,7 @@ EOM
                 $idx = PublicInbox::SearchIdx->new($ibx, 1);
         }
         $idx->index_sync($opt);
+        $idx->{nidx} // 0; # returns number processed
 }
 
 sub progress_prepare ($) {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c8e309fc..b3361e05 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -615,6 +615,7 @@ sub index_both { # git->cat_async callback
         $smsg->{num} = index_mm($self, $eml, $oid, $sync) or
                 die "E: could not generate NNTP article number for $oid";
         add_message($self, $eml, $smsg, $sync);
+        ++$self->{nidx};
         my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
         ${$sync->{latest_cmt}} = $cur_cmt;
 }
@@ -629,6 +630,7 @@ sub unindex_both { # git->cat_async callback
         if (defined(my $cur_cmt = $sync->{cur_cmt})) {
                 ${$sync->{latest_cmt}} = $cur_cmt;
         }
+        ++$self->{nidx};
 }
 
 sub with_umask {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 2b849ddf..ca52874b 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -891,12 +891,22 @@ sub reindex_checkpoint ($$) {
         $mm_tmp->atfork_parent if $mm_tmp;
 }
 
+sub index_finalize ($$) {
+        my ($arg, $index) = @_;
+        ++$arg->{self}->{nidx};
+        if (defined(my $cur = $arg->{cur_cmt})) {
+                ${$arg->{latest_cmt}} = $cur;
+        } elsif ($index) {
+                die 'BUG: {cur_cmt} missing';
+        } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
 sub index_oid { # cat_async callback
         my ($bref, $oid, $type, $size, $arg) = @_;
-        return if is_bad_blob($oid, $type, $size, $arg->{oid});
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 1); # size == 0 purged returns here
         my $self = $arg->{self};
         local $self->{current_info} = "$self->{current_info} $oid";
-        return if $size == 0; # purged
         my ($num, $mid0);
         my $eml = PublicInbox::Eml->new($$bref);
         my $mids = mids($eml);
@@ -967,7 +977,7 @@ sub index_oid { # cat_async callback
         if (do_idx($self, $bref, $eml, $smsg)) {
                 ${$arg->{need_checkpoint}} = 1;
         }
-        ${$arg->{latest_cmt}} = $arg->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+        index_finalize($arg, 1);
 }
 
 # only update last_commit for $i on reindex iff newer than current
@@ -1157,11 +1167,12 @@ sub unindex_oid_aux ($$$) {
 }
 
 sub unindex_oid ($$;$) { # git->cat_async callback
-        my ($bref, $oid, $type, $size, $sync) = @_;
-        return if is_bad_blob($oid, $type, $size, $sync->{oid});
-        my $self = $sync->{self};
+        my ($bref, $oid, $type, $size, $arg) = @_;
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 0);
+        my $self = $arg->{self};
         local $self->{current_info} = "$self->{current_info} $oid";
-        my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+        my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
         my $mm = $self->{mm};
         my $mids = mids(PublicInbox::Eml->new($bref));
         undef $$bref;
@@ -1186,6 +1197,7 @@ sub unindex_oid ($$;$) { # git->cat_async callback
                 }
                 unindex_oid_aux($self, $oid, $mid);
         }
+        index_finalize($arg, 0);
 }
 
 sub git { $_[0]->{ibx}->git }