about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-28 08:45:21 +0000
committerEric Wong <e@80x24.org>2020-11-29 02:25:39 +0000
commit50ac81092ba1034f3055ddabb3d7cc7853edfa41 (patch)
tree8662fcc2883fc4a145c0618e2ed197e943289c2c
parent44de182766037948d62bc2a8ba924de2264dd5fc (diff)
downloadpublic-inbox-50ac81092ba1034f3055ddabb3d7cc7853edfa41.tar.gz
We need to completely remove a message from over.sqlite3 and
Xapian when no references remain, otherwise users will still see
the removed messages in NNTP overviews and WWW search
results/summaries.

References to messages are now solely handled by the `xref3'
table of over.sqlite3.  We can also trust `xref3' when deciding
whether to remove only the "O$eidx_key" and "G$lid" terms from a
document in Xapian or to remove the entire Xapian document.
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm13
-rw-r--r--lib/PublicInbox/OverIdx.pm27
-rw-r--r--t/extsearch.t20
-rw-r--r--t/over.t2
4 files changed, 55 insertions, 7 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index cf90c562..d780776f 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -128,14 +128,21 @@ sub do_xpost ($$) {
         my $oid = $req->{oid};
         my $xibx = $req->{ibx};
         my $eml = $req->{eml};
+        my $eidx_key = $xibx->eidx_key;
         if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message
                 my $xnum = $req->{xnum};
-                $self->{oidx}->add_xref3($docid, $xnum, $oid, $xibx->eidx_key);
+                $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key);
                 $idx->shard_add_eidx_info($docid, $oid, $xibx, $eml);
                 check_batch_limit($req);
         } else { # 'd'
-                $self->{oidx}->remove_xref3($docid, $oid, $xibx->eidx_key);
-                $idx->shard_remove_eidx_info($docid, $oid, $xibx, $eml);
+                my $rm_eidx_info;
+                my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key,
+                                                        \$rm_eidx_info);
+                if ($nr == 0) {
+                        $idx->shard_remove($oid, $docid);
+                } elsif ($rm_eidx_info) {
+                        $idx->shard_remove_eidx_info($docid, $oid, $xibx, $eml);
+                }
         }
 }
 
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 8bec08da..07cca4e5 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -595,13 +595,14 @@ INSERT OR IGNORE INTO xref3 (docid, ibx_id, xnum, oidbin) VALUES (?, ?, ?, ?)
         $sth->execute;
 }
 
+# returns remaining reference count to $docid
 sub remove_xref3 {
-        my ($self, $docid, $oidhex, $eidx_key) = @_;
+        my ($self, $docid, $oidhex, $eidx_key, $rm_eidx_info) = @_;
         begin_lazy($self);
         my $oidbin = pack('H*', $oidhex);
-        my $sth;
+        my ($sth, $ibx_id);
         if (defined $eidx_key) {
-                my $ibx_id = id_for($self, 'inboxes', 'ibx_id',
+                $ibx_id = id_for($self, 'inboxes', 'ibx_id',
                                         eidx_key => $eidx_key);
                 $sth = $self->{dbh}->prepare_cached(<<'');
 DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? AND oidbin = ?
@@ -617,6 +618,26 @@ DELETE FROM xref3 WHERE docid = ? AND oidbin = ?
                 $sth->bind_param(2, $oidbin, SQL_BLOB);
         }
         $sth->execute;
+        $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ?
+
+        $sth->execute($docid);
+        my $nr = $sth->fetchrow_array;
+        if ($nr == 0) {
+                delete_by_num($self, $docid);
+        } elsif (defined($ibx_id) && $rm_eidx_info) {
+                # if deduplication rules in ContentHash change, it's
+                # possible a docid can have multiple rows with the
+                # same ibx_id.  This governs whether or not we call
+                # ->shard_remove_eidx_info in ExtSearchIdx.
+                $sth = $self->{dbh}->prepare_cached(<<'', undef, 1);
+SELECT COUNT(*) FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+                $sth->execute($docid, $ibx_id);
+                my $count = $sth->fetchrow_array;
+                $$rm_eidx_info = ($count == 0);
+        }
+        $nr;
 }
 
 # for when an xref3 goes missing, this does NOT update {ts}
diff --git a/t/extsearch.t b/t/extsearch.t
index f9f74e5c..f5855558 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -118,6 +118,26 @@ my $es = PublicInbox::ExtSearch->new("$home/extindex");
         is(scalar(@$x1), 1, 'original only has one xref3');
         is(scalar(@$x2), 1, 'new message has one xref3');
         isnt($x1->[0], $x2->[0], 'xref3 differs');
+
+        my $mset = $es->mset('b:"BEST MSG"');
+        is($mset->size, 1, 'new message found');
+        $mset = $es->mset('b:"test message"');
+        is($mset->size, 1, 'old message found');
+
+        delete @$es{qw(git over xdb)}; # fork preparation
+
+        open my $rmfh, '+>', undef or BAIL_OUT $!;
+        $rmfh->autoflush(1);
+        print $rmfh $eml2->as_string or BAIL_OUT $!;
+        seek($rmfh, 0, SEEK_SET) or BAIL_OUT $!;
+        $opt->{0} = $rmfh;
+        ok(run_script([qw(-learn rm --all)], undef, $opt), '-learn rm');
+
+        ok(run_script([qw(-extindex --all), "$home/extindex"], undef, undef),
+                'extindex after rm');
+        is($es->over->get_art(2), undef, 'doc #2 gone');
+        $mset = $es->mset('b:"BEST MSG"');
+        is($mset->size, 0, 'new message gone');
 }
 
 my $misc = $es->misc;
diff --git a/t/over.t b/t/over.t
index 56c20d01..22061249 100644
--- a/t/over.t
+++ b/t/over.t
@@ -91,7 +91,7 @@ $over->eidx_prep;
                         'xref3 works forw two');
 
         @arg = qw(1349 adeadba7cafe example.key);
-        ok($over->remove_xref3(@arg), 'remove first');
+        is($over->remove_xref3(@arg), 1, 'remove first');
         $xref3 = $over->get_xref3(1349);
         is_deeply($xref3, [ 'example.kee:2018:deadbeefcafe' ],
                 'confirm removal successful');