From 3f27381a79782be136ae8d27c8f585edbbc6f6bf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 1 Sep 2021 00:17:32 +0000 Subject: extindex: --gc removes messages from over, too While messages from removed inboxes were removed from Xapian search, --gc failed to remove messages from over.sqlite3 entirely. They no longer show up in the topic summary view. Reported-by: Konstantin Ryabitsev Link: https://public-inbox.org/20210830201723.dehoul4y6gpqf2cp@nitro.local/ --- lib/PublicInbox/ExtSearchIdx.pm | 19 +++++++------ t/extsearch.t | 61 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index cf61237c..8cdad23d 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -343,20 +343,18 @@ sub _sync_inbox ($$$) { sub gc_unref_doc ($$$$) { my ($self, $ibx_id, $eidx_key, $docid) = @_; - my $dbh = $self->{oidx}->dbh; - + my $remain = 0; # for debug/info purposes, oids may no longer be accessible + my $dbh = $self->{oidx}->dbh; my $sth = $dbh->prepare_cached(<<'', undef, 1); SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ? $sth->execute($docid, $ibx_id); my @oid = map { unpack('H*', $_->[0]) } @{$sth->fetchall_arrayref}; - - $dbh->prepare_cached(<<'')->execute($docid, $ibx_id); -DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? - - my $remain = $self->{oidx}->get_xref3($docid); - if (scalar(@$remain)) { + for my $oid (@oid) { + $remain += $self->{oidx}->remove_xref3($docid, $oid, $eidx_key); + } + if ($remain) { $self->{oidx}->eidxq_add($docid); # enqueue for reindex for my $oid (@oid) { warn "I: unref #$docid $eidx_key $oid\n"; @@ -421,6 +419,11 @@ DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over) warn "I: eliminated $nr stale xref3 entries\n" if $nr != 0; + # fixup from old bugs: + $nr = $dbh->do(<<''); +DELETE FROM over WHERE num NOT IN (SELECT docid FROM xref3) + + warn "I: eliminated $nr stale over entries\n" if $nr != 0; done($self); } diff --git a/t/extsearch.t b/t/extsearch.t index b03adc17..ad4f2c6d 100644 --- a/t/extsearch.t +++ b/t/extsearch.t @@ -466,4 +466,65 @@ SKIP: { '--gc works after compact'); } +{ # ensure --gc removes non-xposted messages + my $old_size = -s $cfg_path // xbail "stat $cfg_path $!"; + my $tmp_addr = 'v2tmp@example.com'; + run_script([qw(-init v2tmp --indexlevel basic + --newsgroup v2tmp.example), + "$home/v2tmp", 'http://example.com/v2tmp', $tmp_addr ]) + or xbail '-init'; + $env = { ORIGINAL_RECIPIENT => $tmp_addr }; + open $fh, '+>', undef or xbail "open $!"; + $fh->autoflush(1); + my $mid = 'tmpmsg@example.com'; + print $fh < +Subject: tmpmsg +Date: Tue, 19 Jan 2038 03:14:07 +0000 + +EOM + seek $fh, 0, SEEK_SET or xbail "seek $!"; + run_script([qw(-mda --no-precheck)], $env, {0 => $fh}) or xbail '-mda'; + ok(run_script([qw(-extindex --all), "$home/extindex"]), 'update'); + my $nr; + { + my $es = PublicInbox::ExtSearch->new("$home/extindex"); + my ($id, $prv); + my $smsg = $es->over->next_by_mid($mid, \$id, \$prv); + ok($smsg, 'tmpmsg indexed'); + my $mset = $es->search->mset("mid:$mid"); + is($mset->size, 1, 'new message found'); + $mset = $es->search->mset('z:0..'); + $nr = $mset->size; + } + truncate($cfg_path, $old_size) or xbail "truncate $!"; + my $rdr = { 2 => \(my $err) }; + ok(run_script([qw(-extindex --gc), "$home/extindex"], undef, $rdr), + 'gc to get rid of removed inbox'); + is_deeply([ grep(!/^(?:I:|#)/, split(/^/m, $err)) ], [], + 'no non-informational errors in stderr'); + + my $es = PublicInbox::ExtSearch->new("$home/extindex"); + my $mset = $es->search->mset("mid:$mid"); + is($mset->size, 0, 'tmpmsg gone from search'); + my ($id, $prv); + is($es->over->next_by_mid($mid, \$id, \$prv), undef, + 'tmpmsg gone from over'); + $id = $prv = undef; + is($es->over->next_by_mid('testmessage@example.com', \$id, \$prv), + undef, 'remaining message not indavderover'); + $mset = $es->search->mset('z:0..'); + is($mset->size, $nr - 1, 'existing messages not clobbered from search'); + my $o = $es->over->{dbh}->selectall_arrayref(<size, 'over row count matches Xapian'); + my $x = $es->over->{dbh}->selectall_arrayref(<