about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-25 10:21:12 +0000
committerEric Wong <e@80x24.org>2020-12-26 06:22:56 +0000
commitfb4dd7fdeeed8478cda9b7e63e56564da8cbdacf (patch)
treee50ce0cadd50c5210c0b4f53239e8db605a529bc
parent14e606423429d6121c295c2bc0599fe1bf66b07c (diff)
downloadpublic-inbox-fb4dd7fdeeed8478cda9b7e63e56564da8cbdacf.tar.gz
We'll count the number of log changes (regardless of index or
unindex) and only attach inboxes to ExtSearchIdx objects when
they get new work.  We'll also reduce lock bouncing and only
update external indices after all per-inbox indexing is done.

This also updates existing v2 indexing/unindexing callers
to be more consistent and ensures unindex log entries update
per-inbox last commit information.
-rw-r--r--lib/PublicInbox/Admin.pm1
-rw-r--r--lib/PublicInbox/SearchIdx.pm2
-rw-r--r--lib/PublicInbox/V2Writable.pm26
-rwxr-xr-xscript/public-inbox-index23
4 files changed, 36 insertions, 16 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 9a86d206..b468108e 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -271,6 +271,7 @@ EOM
                 $idx = PublicInbox::SearchIdx->new($ibx, 1);
         }
         $idx->index_sync($opt);
+        $idx->{nidx} // 0; # returns number processed
 }
 
 sub progress_prepare ($) {
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index c8e309fc..b3361e05 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -615,6 +615,7 @@ sub index_both { # git->cat_async callback
         $smsg->{num} = index_mm($self, $eml, $oid, $sync) or
                 die "E: could not generate NNTP article number for $oid";
         add_message($self, $eml, $smsg, $sync);
+        ++$self->{nidx};
         my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing';
         ${$sync->{latest_cmt}} = $cur_cmt;
 }
@@ -629,6 +630,7 @@ sub unindex_both { # git->cat_async callback
         if (defined(my $cur_cmt = $sync->{cur_cmt})) {
                 ${$sync->{latest_cmt}} = $cur_cmt;
         }
+        ++$self->{nidx};
 }
 
 sub with_umask {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 2b849ddf..ca52874b 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -891,12 +891,22 @@ sub reindex_checkpoint ($$) {
         $mm_tmp->atfork_parent if $mm_tmp;
 }
 
+sub index_finalize ($$) {
+        my ($arg, $index) = @_;
+        ++$arg->{self}->{nidx};
+        if (defined(my $cur = $arg->{cur_cmt})) {
+                ${$arg->{latest_cmt}} = $cur;
+        } elsif ($index) {
+                die 'BUG: {cur_cmt} missing';
+        } # else { unindexing @leftovers doesn't set {cur_cmt}
+}
+
 sub index_oid { # cat_async callback
         my ($bref, $oid, $type, $size, $arg) = @_;
-        return if is_bad_blob($oid, $type, $size, $arg->{oid});
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 1); # size == 0 purged returns here
         my $self = $arg->{self};
         local $self->{current_info} = "$self->{current_info} $oid";
-        return if $size == 0; # purged
         my ($num, $mid0);
         my $eml = PublicInbox::Eml->new($$bref);
         my $mids = mids($eml);
@@ -967,7 +977,7 @@ sub index_oid { # cat_async callback
         if (do_idx($self, $bref, $eml, $smsg)) {
                 ${$arg->{need_checkpoint}} = 1;
         }
-        ${$arg->{latest_cmt}} = $arg->{cur_cmt} // die 'BUG: {cur_cmt} missing';
+        index_finalize($arg, 1);
 }
 
 # only update last_commit for $i on reindex iff newer than current
@@ -1157,11 +1167,12 @@ sub unindex_oid_aux ($$$) {
 }
 
 sub unindex_oid ($$;$) { # git->cat_async callback
-        my ($bref, $oid, $type, $size, $sync) = @_;
-        return if is_bad_blob($oid, $type, $size, $sync->{oid});
-        my $self = $sync->{self};
+        my ($bref, $oid, $type, $size, $arg) = @_;
+        is_bad_blob($oid, $type, $size, $arg->{oid}) and
+                return index_finalize($arg, 0);
+        my $self = $arg->{self};
         local $self->{current_info} = "$self->{current_info} $oid";
-        my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef;
+        my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef;
         my $mm = $self->{mm};
         my $mids = mids(PublicInbox::Eml->new($bref));
         undef $$bref;
@@ -1186,6 +1197,7 @@ sub unindex_oid ($$;$) { # git->cat_async callback
                 }
                 unindex_oid_aux($self, $oid, $mid);
         }
+        index_finalize($arg, 0);
 }
 
 sub git { $_[0]->{ibx}->git }
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 87893ef1..a17bf615 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -63,7 +63,7 @@ my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg);
 PublicInbox::Admin::require_or_die('-index');
 unless (@ibxs) { print STDERR $help; exit 1 }
 
-my (@eidx_dir, %eidx_seen);
+my (@eidx, %eidx_seen);
 my $update_extindex = $opt->{'update-extindex'};
 if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) {
         # extindex and normal inboxes may have different owners
@@ -84,7 +84,8 @@ for my $ei_name (@$update_extindex) {
         } else {
                 die "extindex `$ei_name' not configured or found\n";
         }
-        $eidx_seen{$topdir} //= push(@eidx_dir, $topdir);
+        $eidx_seen{$topdir} //=
+                push(@eidx, PublicInbox::ExtSearchIdx->new($topdir));
 }
 my $mods = {};
 my @eidx_unconfigured;
@@ -95,7 +96,7 @@ foreach my $ibx (@ibxs) {
         $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ?
                         'full' : $detected);
         PublicInbox::Admin::scan_ibx_modules($mods, $ibx);
-        if (@eidx_dir && $ibx->{-unconfigured}) {
+        if (@eidx && $ibx->{-unconfigured}) {
                 push @eidx_unconfigured, "  $ibx->{inboxdir}\n";
         }
 }
@@ -128,18 +129,22 @@ publicInbox.$ibx->{name}.indexSequentialShard not boolean
 EOL
                 $ibx_opt = { %$opt, sequential_shard => $v };
         }
-        PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
+        my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt);
         last if $ibx_opt->{quit};
         if (my $copt = $opt->{compact_opt}) {
                 local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard};
                 PublicInbox::Xapcmd::run($ibx, 'compact', $copt);
         }
-        next if $ibx->{-unconfigured};
         last if $ibx_opt->{quit};
-        for my $dir (@eidx_dir) {
-                my $eidx = PublicInbox::ExtSearchIdx->new($dir);
+        next if $ibx->{-unconfigured} || !$nidx;
+        for my $eidx (@eidx) {
                 $eidx->attach_inbox($ibx);
-                $eidx->eidx_sync($ibx_opt);
-                last if $ibx_opt->{quit};
         }
 }
+$opt->{-no_fsync} = 1 if !$opt->{fsync};
+my $pr = $opt->{-progress};
+for my $eidx (@eidx) {
+        $pr->("indexing $eidx->{topdir} ...\n") if $pr;
+        $eidx->eidx_sync($opt);
+        last if $opt->{quit};
+}