diff options
author | Eric Wong <e@80x24.org> | 2020-11-29 05:09:35 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-11-29 06:42:00 +0000 |
commit | ea885bf0f76dc1795dc771667be721ec0fed5482 (patch) | |
tree | 5323da5926b6aa4c20619e2a05805f581722a389 /lib/PublicInbox/SearchIdx.pm | |
parent | 33f80a367325f3dac983633cb0a59946205776c2 (diff) | |
download | public-inbox-ea885bf0f76dc1795dc771667be721ec0fed5482.tar.gz |
Inboxes may be removed or newsgroups renamed over time. Introduce a switch to do garbage collection and eliminate stale search and xref3 results based on inboxes which remain in the config file. This may also fixup stale results leftover from any bugs which may leave stale data around. This is also useful in case a clumsy BOFH (me :P) is swapping between several PI_CONFIGs and accidentally indexed a bunch of inboxes they didn't intend to.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index d06c159b..c18c7c36 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -471,7 +471,7 @@ sub remove_eidx_info { my $doc = _get_doc($self, $docid, $oid) or return; eval { $doc->remove_term('O'.$eidx_key) }; warn "W: ->remove_term O$eidx_key: $@\n" if $@; - for my $l ($eml->header_raw('List-Id')) { + for my $l ($eml ? $eml->header_raw('List-Id') : ()) { $l =~ /<([^>]+)>/ or next; my $lid = lc $1; eval { $doc->remove_term('G' . $lid) }; @@ -970,4 +970,25 @@ sub eidx_shard_new { $self; } +# ensure there's no stale Xapian docs by treating $over as canonical +sub over_check { + my ($self, $over) = @_; + begin_txn_lazy($self); + my $sth = $over->dbh->prepare(<<''); +SELECT COUNT(*) FROM over WHERE num = ? + + my $xdb = $self->{xdb}; + my $cur = $xdb->postlist_begin(''); + my $end = $xdb->postlist_end(''); + my $xdir = $self->xdir; + for (; $cur != $end; $cur++) { + my $docid = $cur->get_docid; + $sth->execute($docid); + my $x = $sth->fetchrow_array; + next if $x > 0; + warn "I: removing $xdir #$docid, not in `over'\n"; + $xdb->delete_document($docid); + } +} + 1; |