about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-06-17 22:00:47 +0000
committerEric Wong <e@80x24.org>2021-06-17 22:34:59 +0000
commitd1052f03ea85d4afe19106584ed6ebd675dcead0 (patch)
treeec221991d18d62a9bd3da507945d57d44e99ccd6
parent09f1fabd1b3097426734b358d7231c11ab4b8d6e (diff)
downloadpublic-inbox-d1052f03ea85d4afe19106584ed6ebd675dcead0.tar.gz
I'm not sure how this happened (only once for me in March), but
it should not happen...  In any case, we'll operate on the
lowest numbered docid and cull redundant index entries when
lei/store is open for read-write.

This also fixes the normal lei/store removal path to clean up
the xref3 table (since it's not done automatically for
public-facing -eidx due to the multi-list nature of it).
-rw-r--r--lib/PublicInbox/LeiStore.pm54
-rw-r--r--lib/PublicInbox/SearchIdx.pm2
2 files changed, 36 insertions, 20 deletions
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index f978288a..4ba1e647 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -226,6 +226,18 @@ sub _remove_if_local { # git->cat_async arg
         $self->{im}->remove($bref) if $bref;
 }
 
+sub remove_docids ($;@) {
+        my ($self, @docids) = @_;
+        my $eidx = eidx_init($self);
+        for my $docid (@docids) {
+                $eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid);
+                $self->{oidx}->delete_by_num($docid);
+                $self->{oidx}->{dbh}->do(<<EOF, undef, $docid);
+DELETE FROM xref3 WHERE docid = ?
+EOF
+        }
+}
+
 # remove the entire message from the index, does not touch mail_sync.sqlite3
 sub remove_eml {
         my ($self, $eml) = @_;
@@ -241,13 +253,25 @@ sub remove_eml {
                         my $oidhex = unpack('H*', $oidbin);
                         $git->cat_async($oidhex, \&_remove_if_local, $self);
                 }
-                $eidx->idx_shard($docid)->ipc_do('xdb_remove', $docid);
-                $oidx->delete_by_num($docid);
         }
         $git->cat_async_wait;
+        remove_docids($self, @docids);
         \@docids;
 }
 
+sub oid2docid ($$) {
+        my ($self, $oid) = @_;
+        my $eidx = eidx_init($self);
+        my ($docid, @cull) = $eidx->{oidx}->blob_exists($oid);
+        if (@cull) { # fixup old bugs...
+                warn <<EOF;
+W: $oid indexed as multiple docids: $docid @cull, culling to fixup old bugs
+EOF
+                remove_docids($self, @cull);
+        }
+        wantarray ? ($docid) : $docid;
+}
+
 sub add_eml {
         my ($self, $eml, $vmd, $xoids) = @_;
         my $im = $self->{-fake_im} // $self->importer; # may create new epoch
@@ -268,7 +292,7 @@ sub add_eml {
                 if (scalar keys %$xoids) {
                         my %docids = map { $_ => 1 } @$vivify_xvmd;
                         for my $oid (keys %$xoids) {
-                                my @id = $oidx->blob_exists($oid);
+                                my @id = oid2docid($self, $oid);
                                 @docids{@id} = @id;
                         }
                         @$vivify_xvmd = sort { $a <=> $b } keys(%docids);
@@ -356,15 +380,11 @@ sub update_xvmd {
         my $oidx = $eidx->{oidx};
         my %seen;
         for my $oid (keys %$xoids) {
-                my @docids = $oidx->blob_exists($oid) or next;
-                scalar(@docids) > 1 and
-                        warn "W: $oid indexed as multiple docids: @docids\n";
-                for my $docid (@docids) {
-                        next if $seen{$docid}++;
-                        my $idx = $eidx->idx_shard($docid);
-                        $idx->ipc_do('update_vmd', $docid, $vmd_mod);
-                }
+                my $docid = oid2docid($self, $oid) // next;
                 delete $xoids->{$oid};
+                next if $seen{$docid}++;
+                my $idx = $eidx->idx_shard($docid);
+                $idx->ipc_do('update_vmd', $docid, $vmd_mod);
         }
         return unless scalar(keys(%$xoids));
 
@@ -395,15 +415,11 @@ sub set_xvmd {
 
         # see if we can just update existing docs
         for my $oid (keys %$xoids) {
-                my @docids = $oidx->blob_exists($oid) or next;
-                scalar(@docids) > 1 and
-                        warn "W: $oid indexed as multiple docids: @docids\n";
-                for my $docid (@docids) {
-                        next if $seen{$docid}++;
-                        my $idx = $eidx->idx_shard($docid);
-                        $idx->ipc_do('set_vmd', $docid, $vmd);
-                }
+                my $docid = oid2docid($self, $oid) // next;
                 delete $xoids->{$oid}; # all done with this oid
+                next if $seen{$docid}++;
+                my $idx = $eidx->idx_shard($docid);
+                $idx->ipc_do('set_vmd', $docid, $vmd);
         }
         return unless scalar(keys(%$xoids));
 
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f066cc92..f553eda6 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -572,7 +572,7 @@ sub apply_vmd_mod ($$) {
         my $updated = 0;
         my @x = @VMD_MAP;
         while (my ($field, $pfx) = splice(@x, 0, 2)) {
-                # field: "label" or "kw"
+                # field: "L" or "kw"
                 for my $val (@{$vmd_mod->{"-$field"} // []}) {
                         eval {
                                 $doc->remove_term($pfx . $val);