about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-28 08:45:21 +0000
committerEric Wong <e@80x24.org>2020-11-29 02:25:39 +0000
commit50ac81092ba1034f3055ddabb3d7cc7853edfa41 (patch)
tree8662fcc2883fc4a145c0618e2ed197e943289c2c /lib/PublicInbox
parent44de182766037948d62bc2a8ba924de2264dd5fc (diff)
downloadpublic-inbox-50ac81092ba1034f3055ddabb3d7cc7853edfa41.tar.gz
We need to completely remove a message from over.sqlite3 and
Xapian when no references remain, otherwise users will still see
the removed messages in NNTP overviews and WWW search
results/summaries.

References to messages are now solely handled by the `xref3'
table of over.sqlite3.  We can also trust `xref3' when deciding
whether to remove only the "O$eidx_key" and "G$lid" terms from a
document in Xapian or to remove the entire Xapian document.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm13
-rw-r--r--lib/PublicInbox/OverIdx.pm27
2 files changed, 34 insertions, 6 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index cf90c562..d780776f 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -128,14 +128,21 @@ sub do_xpost ($$) {
         my $oid = $req->{oid};
         my $xibx = $req->{ibx};
         my $eml = $req->{eml};
+        my $eidx_key = $xibx->eidx_key;
         if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
                 my $xnum = $req->{xnum};
-                $self->{oidx}->add_xref3($docid, $xnum, $oid, $xibx->eidx_key);
+                $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
                 $idx->shard_add_eidx_info($docid, $oid, $xibx, $eml);
                 check_batch_limit($req);
         } else { # 'd'
-                $self->{oidx}->remove_xref3($docid, $oid, $xibx->eidx_key);
-                $idx->shard_remove_eidx_info($docid, $oid, $xibx, $eml);
+                my $rm_eidx_info;
+                my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key,
+                                                        \$rm_eidx_info);
+                if ($nr == 0) {
+                        $idx->shard_remove($oid, $docid);
+                } elsif ($rm_eidx_info) {
+                        $idx->shard_remove_eidx_info($docid, $oid, $xibx, $eml);
+                }
         }
 }
 
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 8bec08da..07cca4e5 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -595,13 +595,14 @@ INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?)
         $sth->execute;
 }
 
+# returns remaining reference count to $docid
 sub remove_xref3 {
-        my ($self, $docid, $oidhex, $eidx_key) = @_;
+        my ($self, $docid, $oidhex, $eidx_key, $rm_eidx_info) = @_;
         begin_lazy($self);
         my $oidbin = pack('H*', $oidhex);
-        my $sth;
+        my ($sth, $ibx_id);
         if (defined $eidx_key) {
-                my $ibx_id = id_for($self, 'inboxes', 'ibx_id',
+                $ibx_id = id_for($self, 'inboxes', 'ibx_id',
                                         eidx_key => $eidx_key);
                 $sth = $self->{dbh}->prepare_cached(<<'');
 DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ?
@@ -617,6 +618,26 @@ DELETE FROM xref3 WHERE docid = ? AND oidbin = ?
                 $sth->bind_param(2, $oidbin, SQL_BLOB);
         }
         $sth->execute;
+        $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ?
+
+        $sth->execute($docid);
+        my $nr = $sth->fetchrow_array;
+        if ($nr == 0) {
+                delete_by_num($self, $docid);
+        } elsif (defined($ibx_id) && $rm_eidx_info) {
+                # if deduplication rules in ContentHash change, it's
+                # possible a docid can have multiple rows with the
+                # same ibx_id.  This governs whether or not we call
+                # ->shard_remove_eidx_info in ExtSearchIdx.
+                $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+                $sth->execute($docid, $ibx_id);
+                my $count = $sth->fetchrow_array;
+                $$rm_eidx_info = ($count == 0);
+        }
+        $nr;
 }
 
 # for when an xref3 goes missing, this does NOT update {ts}