about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdxShard.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-29 05:09:35 +0000
committerEric Wong <e@80x24.org>2020-11-29 06:42:00 +0000
commitea885bf0f76dc1795dc771667be721ec0fed5482 (patch)
tree5323da5926b6aa4c20619e2a05805f581722a389 /lib/PublicInbox/SearchIdxShard.pm
parent33f80a367325f3dac983633cb0a59946205776c2 (diff)
downloadpublic-inbox-ea885bf0f76dc1795dc771667be721ec0fed5482.tar.gz
Inboxes may be removed or newsgroups renamed over time.
Introduce a switch to do garbage collection and eliminate stale
search and xref3 results based on inboxes which remain in the
config file.

This may also fixup stale results leftover from any bugs which
may leave stale data around.

This is also useful in case a clumsy BOFH (me :P) is swapping
between several PI_CONFIGs and accidentally indexed a bunch of
inboxes they didn't intend to.
Diffstat (limited to 'lib/PublicInbox/SearchIdxShard.pm')
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm21
1 files changed, 18 insertions, 3 deletions
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index dcfeb0be..53fac9b6 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -57,6 +57,7 @@ sub spawn_worker {
 
 sub eml ($$) {
         my ($r, $len) = @_;
+        return if $len == 0;
         my $n = read($r, my $bref, $len) or die "read: $!\n";
         $n == $len or die "short read: $n != $len\n";
         PublicInbox::Eml->new(\$bref);
@@ -92,6 +93,10 @@ sub shard_worker_loop ($$$$$) {
                         chomp $eidx_key;
                         $self->remove_eidx_info($docid, $oid, $eidx_key,
                                                         eml($r, $len));
+                } elsif ($line =~ s/\AO ([^\n]+)\n//) {
+                        my $over_fn = $1;
+                        $over_fn =~ tr/\0/\n/;
+                        $self->over_check(PublicInbox::Over->new($over_fn));
                 } else {
                         chomp $line;
                         my $eidx_key;
@@ -155,10 +160,9 @@ sub shard_add_eidx_info {
 }
 
 sub shard_remove_eidx_info {
-        my ($self, $docid, $oid, $xibx, $eml) = @_;
-        my $eidx_key = $xibx->eidx_key;
+        my ($self, $docid, $oid, $eidx_key, $eml) = @_;
         if (my $w = $self->{w}) {
-                my $hdr = $eml->header_obj->as_string;
+                my $hdr = $eml ? $eml->header_obj->as_string : '';
                 my $len = length($hdr);
                 print $w "-X $len $docid $oid $eidx_key\n", $hdr or
                         die "failed to write shard: $!";
@@ -212,4 +216,15 @@ sub shard_remove {
         }
 }
 
+sub shard_over_check {
+        my ($self, $over) = @_;
+        if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child
+                my ($over_fn) = $over->{dbh}->sqlite_db_filename;
+                $over_fn =~ tr/\n/\0/;
+                print $w "O $over_fn\n" or die "failed to write over $!";
+        } else {
+                $self->over_check($over);
+        }
+}
+
 1;