about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-15 02:02:20 +0000
committerEric Wong <e@80x24.org>2020-12-17 19:13:11 +0000
commit35cca56ecb10649f7935d601ef513162e38cb3b8 (patch)
treebdca5532c7e46f500cdb3214b9b7d8304de99dde
parent7cc8cb8a8554e1eae817ce2f21dd28b413d3bafc (diff)
downloadpublic-inbox-35cca56ecb10649f7935d601ef513162e38cb3b8.tar.gz
Instead of just working on over.sqlite3, we need to work on
the Xapian DBs as well.  While no changes to our Xapian use
have taken place recently, they could in the future and
--reindex exists to account for that.
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm18
1 files changed, 15 insertions, 3 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index c77fb197..f29a84e3 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -404,13 +404,18 @@ sub _reindex_finalize ($$$) {
         my $orig_smsg = $req->{orig_smsg} // die 'BUG: no {orig_smsg}';
         my $docid = $smsg->{num} = $orig_smsg->{num};
         $self->{oidx}->add_overview($eml, $smsg); # may rethread
-        return if $nr == 1; # likely, all good
-
+        $self->{transact_bytes} += $smsg->{bytes};
+        if ($nr == 1) { # likely, all good
+                $self->idx_shard($docid)->shard_reindex_docid($docid);
+                return;
+        }
         warn "W: #$docid split into $nr due to deduplication change\n";
         my $chash0 = $smsg->{chash} // die "BUG: $smsg->{blob} no {chash}";
         delete($by_chash->{$chash0}) // die "BUG: $smsg->{blob} chash missing";
+        my @todo;
         for my $ary (values %$by_chash) {
                 for my $x (reverse @$ary) {
+                        warn "removing #$docid xref3 $x->{blob}\n";
                         my $n = $self->{oidx}->remove_xref3($docid, $x->{blob});
                         die "BUG: $x->{blob} invalidated #$docid" if $n == 0;
                 }
@@ -424,6 +429,12 @@ sub _reindex_finalize ($$$) {
                 $e->{blob} eq $x->{blob} or die <<EOF;
 $x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num});
 EOF
+                push @todo, $ibx, $e;
+        }
+        $self->{oidx}->commit_lazy; # ensure shard workers can see xref removals
+        $self->{oidx}->begin_lazy;
+        $self->idx_shard($docid)->shard_reindex_docid($docid);
+        while (my ($ibx, $e) = splice(@todo, 0, 2)) {
                 reindex_unseen($self, $sync, $ibx, $e);
         }
 }
@@ -531,11 +542,12 @@ sub eidxq_process ($$) { # for reindexing
 
                 # shards flush on their own, just don't queue up too many
                 # deletes
-                if (($cur % 1000) == 0) {
+                if ($self->{transact_bytes} >= $self->{batch_bytes}) {
                         $self->git->async_wait_all;
                         $self->{oidx}->commit_lazy;
                         $self->{oidx}->begin_lazy;
                         $pr->("reindexed $cur/$tot\n") if $pr;
+                        $self->{transact_bytes} = 0;
                 }
                 # this is only for SIGUSR1, shards do their own accounting:
                 reindex_checkpoint($self, $sync) if ${$sync->{need_checkpoint}};