about summary refs log tree commit homepage
path: root/lib/PublicInbox/ExtSearchIdx.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/ExtSearchIdx.pm')
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm90
1 files changed, 89 insertions, 1 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index d780776f..4de47b58 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -87,6 +87,7 @@ sub _ibx_attach { # each_inbox callback
 
 sub attach_config {
         my ($self, $cfg) = @_;
+        $self->{cfg} = $cfg;
         $cfg->each_inbox(\&_ibx_attach, $self);
 }
 
@@ -141,7 +142,8 @@ sub do_xpost ($$) {
                 if ($nr == 0) {
                         $idx->shard_remove($oid, $docid);
                 } elsif ($rm_eidx_info) {
-                        $idx->shard_remove_eidx_info($docid, $oid, $xibx, $eml);
+                        $idx->shard_remove_eidx_info($docid, $oid, $eidx_key,
+                                                        $eml);
                 }
         }
 }
@@ -324,6 +326,90 @@ sub _sync_inbox ($$$) {
         $ibx->git->cleanup; # done with this inbox, now
 }
 
+sub unref_doc ($$$$) {
+        my ($self, $ibx_id, $eidx_key, $docid) = @_;
+        my $dbh = $self->{oidx}->dbh;
+
+        # for debug/info purposes, oids may no longer be accessible
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+        $sth->execute($docid, $ibx_id);
+        my @oid = map { unpack('H*', $_->[0]) } @{$sth->fetchall_arrayref};
+
+        $dbh->prepare_cached(<<'')->execute($docid, $ibx_id);
+DELETE FROM xref3 WHERE docid = ? AND ibx_id = ?
+
+        my $remain = $self->{oidx}->get_xref3($docid);
+        my $idx = $self->idx_shard($docid);
+        if (@$remain) {
+                for my $oid (@oid) {
+                        warn "I: unref #$docid $eidx_key $oid\n";
+                        $idx->shard_remove_eidx_info($docid, $oid, $eidx_key);
+                }
+        } else {
+                for my $oid (@oid) {
+                        warn "I: remove #$docid $eidx_key $oid\n";
+                        $idx->shard_remove($oid, $docid);
+                }
+        }
+}
+
+sub eidx_gc {
+        my ($self, $opt) = @_;
+        $self->{cfg} or die "E: GC requires ->attach_config\n";
+        $opt->{-idx_gc} = 1;
+        $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
+
+        my $dbh = $self->{oidx}->dbh;
+        my $x3_doc = $dbh->prepare('SELECT docid FROM xref3 WHERE ibx_id = ?');
+        my $ibx_ck = $dbh->prepare('SELECT ibx_id,eidx_key FROM inboxes');
+        my $lc_i = $dbh->prepare('SELECT key FROM eidx_meta WHERE key LIKE ?');
+
+        $ibx_ck->execute;
+        while (my ($ibx_id, $eidx_key) = $ibx_ck->fetchrow_array) {
+                next if $self->{ibx_map}->{$eidx_key};
+                $self->{midx}->remove_eidx_key($eidx_key);
+                warn "I: deleting messages for $eidx_key...\n";
+                $x3_doc->execute($ibx_id);
+                while (defined(my $docid = $x3_doc->fetchrow_array)) {
+                        unref_doc($self, $ibx_id, $eidx_key, $docid);
+                }
+                $dbh->prepare_cached(<<'')->execute($ibx_id);
+DELETE FROM inboxes WHERE ibx_id = ?
+
+                # drop last_commit info
+                my $pat = $eidx_key;
+                $pat =~ s/([_%])/\\$1/g;
+                $lc_i->execute("lc-%:$pat//%");
+                while (my ($key) = $lc_i->fetchrow_array) {
+                        next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!;
+                        warn "I: removing $key\n";
+                        $dbh->prepare_cached(<<'')->execute($key);
+DELETE FROM eidx_meta WHERE key = ?
+
+                }
+
+                warn "I: $eidx_key removed\n";
+        }
+
+        # it's not real unless it's in `over', we use parallelism here,
+        # shards will be reading directly from over, so commit
+        $self->{oidx}->commit_lazy;
+        $self->{oidx}->begin_lazy;
+
+        for my $idx (@{$self->{idx_shards}}) {
+                warn "I: cleaning up shard #$idx->{shard}\n";
+                $idx->shard_over_check($self->{oidx});
+        }
+        my $nr = $dbh->do(<<'');
+DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over)
+
+        warn "I: eliminated $nr stale xref3 entries\n" if $nr != 0;
+
+        done($self);
+}
+
 sub eidx_sync { # main entry point
         my ($self, $opt) = @_;
         $self->idx_init($opt); # acquire lock via V2Writable::_idx_init
@@ -413,6 +499,7 @@ sub idx_init { # similar to V2Writable
                                 next if $seen{"$st[0]\0$st[1]"}++;
                         } else {
                                 warn "W: stat($d) failed (from $alt): $!\n";
+                                next if $opt->{-idx_gc};
                         }
                         push @old, $line;
                 }
@@ -424,6 +511,7 @@ sub idx_init { # similar to V2Writable
                         next if $seen{"$st[0]\0$st[1]"}++;
                 } else {
                         warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n";
+                        next if $opt->{-idx_gc};
                 }
                 push @new, $line;
         }