diff options
Diffstat (limited to 'lib/PublicInbox/ExtSearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/ExtSearchIdx.pm | 90 |
1 files changed, 89 insertions, 1 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index d780776f..4de47b58 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -87,6 +87,7 @@ sub _ibx_attach { # each_inbox callback sub attach_config { my ($self, $cfg) = @_; + $self->{cfg} = $cfg; $cfg->each_inbox(\&_ibx_attach, $self); } @@ -141,7 +142,8 @@ sub do_xpost ($$) { if ($nr == 0) { $idx->shard_remove($oid, $docid); } elsif ($rm_eidx_info) { - $idx->shard_remove_eidx_info($docid, $oid, $xibx, $eml); + $idx->shard_remove_eidx_info($docid, $oid, $eidx_key, + $eml); } } } @@ -324,6 +326,90 @@ sub _sync_inbox ($$$) { $ibx->git->cleanup; # done with this inbox, now } +sub unref_doc ($$$$) { + my ($self, $ibx_id, $eidx_key, $docid) = @_; + my $dbh = $self->{oidx}->dbh; + + # for debug/info purposes, oids may no longer be accessible + my $sth = $dbh->prepare_cached(<<'', undef, 1); +SELECT oidbin FROM xref3 WHERE docid = ? AND ibx_id = ? + + $sth->execute($docid, $ibx_id); + my @oid = map { unpack('H*', $_->[0]) } @{$sth->fetchall_arrayref}; + + $dbh->prepare_cached(<<'')->execute($docid, $ibx_id); +DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? + + my $remain = $self->{oidx}->get_xref3($docid); + my $idx = $self->idx_shard($docid); + if (@$remain) { + for my $oid (@oid) { + warn "I: unref #$docid $eidx_key $oid\n"; + $idx->shard_remove_eidx_info($docid, $oid, $eidx_key); + } + } else { + for my $oid (@oid) { + warn "I: remove #$docid $eidx_key $oid\n"; + $idx->shard_remove($oid, $docid); + } + } +} + +sub eidx_gc { + my ($self, $opt) = @_; + $self->{cfg} or die "E: GC requires ->attach_config\n"; + $opt->{-idx_gc} = 1; + $self->idx_init($opt); # acquire lock via V2Writable::_idx_init + + my $dbh = $self->{oidx}->dbh; + my $x3_doc = $dbh->prepare('SELECT docid FROM xref3 WHERE ibx_id = ?'); + my $ibx_ck = $dbh->prepare('SELECT ibx_id,eidx_key FROM inboxes'); + my $lc_i = $dbh->prepare('SELECT key FROM eidx_meta WHERE key LIKE ?'); + + $ibx_ck->execute; + while (my ($ibx_id, $eidx_key) = $ibx_ck->fetchrow_array) { + next if $self->{ibx_map}->{$eidx_key}; + $self->{midx}->remove_eidx_key($eidx_key); + warn "I: deleting messages for $eidx_key...\n"; + $x3_doc->execute($ibx_id); + while (defined(my $docid = $x3_doc->fetchrow_array)) { + unref_doc($self, $ibx_id, $eidx_key, $docid); + } + $dbh->prepare_cached(<<'')->execute($ibx_id); +DELETE FROM inboxes WHERE ibx_id = ? + + # drop last_commit info + my $pat = $eidx_key; + $pat =~ s/([_%])/\\$1/g; + $lc_i->execute("lc-%:$pat//%"); + while (my ($key) = $lc_i->fetchrow_array) { + next if $key !~ m!\Alc-v[1-9]+:\Q$eidx_key\E//!; + warn "I: removing $key\n"; + $dbh->prepare_cached(<<'')->execute($key); +DELETE FROM eidx_meta WHERE key = ? + + } + + warn "I: $eidx_key removed\n"; + } + + # it's not real unless it's in `over', we use parallelism here, + # shards will be reading directly from over, so commit + $self->{oidx}->commit_lazy; + $self->{oidx}->begin_lazy; + + for my $idx (@{$self->{idx_shards}}) { + warn "I: cleaning up shard #$idx->{shard}\n"; + $idx->shard_over_check($self->{oidx}); + } + my $nr = $dbh->do(<<''); +DELETE FROM xref3 WHERE docid NOT IN (SELECT num FROM over) + + warn "I: eliminated $nr stale xref3 entries\n" if $nr != 0; + + done($self); +} + sub eidx_sync { # main entry point my ($self, $opt) = @_; $self->idx_init($opt); # acquire lock via V2Writable::_idx_init @@ -413,6 +499,7 @@ sub idx_init { # similar to V2Writable next if $seen{"$st[0]\0$st[1]"}++; } else { warn "W: stat($d) failed (from $alt): $!\n"; + next if $opt->{-idx_gc}; } push @old, $line; } @@ -424,6 +511,7 @@ sub idx_init { # similar to V2Writable next if $seen{"$st[0]\0$st[1]"}++; } else { warn "W: stat($d) failed (from $ibx->{inboxdir}): $!\n"; + next if $opt->{-idx_gc}; } push @new, $line; } |