From a9d492511697f64e495dc48d0eb29c20f9fe590c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 7 Dec 2020 07:40:52 +0000 Subject: searchidx: remove $oid parameter from most calls Xapian docids have been tied to the over {num} column for nearly 3 years, now; and OIDs are no longer stored in Xapian document data. There's no need to increase code and IPC complexity by passing the OID around. --- lib/PublicInbox/ExtSearchIdx.pm | 15 ++++++--------- lib/PublicInbox/SearchIdx.pm | 38 ++++++++++++++++---------------------- lib/PublicInbox/SearchIdxShard.pm | 37 +++++++++++++++++-------------------- lib/PublicInbox/V2Writable.pm | 2 +- 4 files changed, 40 insertions(+), 52 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 819c7903..c06b25a9 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -125,17 +125,16 @@ sub do_xpost ($$) { if (my $new_smsg = $req->{new_smsg}) { # 'm' on cross-posted message my $xnum = $req->{xnum}; $self->{oidx}->add_xref3($docid, $xnum, $oid, $eidx_key); - $idx->shard_add_eidx_info($docid, $oid, $xibx, $eml); + $idx->shard_add_eidx_info($docid, $xibx, $eml); check_batch_limit($req); } else { # 'd' my $rm_eidx_info; my $nr = $self->{oidx}->remove_xref3($docid, $oid, $eidx_key, \$rm_eidx_info); if ($nr == 0) { - $idx->shard_remove($oid, $docid); + $idx->shard_remove($docid); } elsif ($rm_eidx_info) { - $idx->shard_remove_eidx_info($docid, $oid, $eidx_key, - $eml); + $idx->shard_remove_eidx_info($docid, $eidx_key, $eml); } } } @@ -333,13 +332,11 @@ DELETE FROM xref3 WHERE docid = ? AND ibx_id = ? if (@$remain) { for my $oid (@oid) { warn "I: unref #$docid $eidx_key $oid\n"; - $idx->shard_remove_eidx_info($docid, $oid, $eidx_key); + $idx->shard_remove_eidx_info($docid, $eidx_key); } } else { - for my $oid (@oid) { - warn "I: remove #$docid $eidx_key $oid\n"; - $idx->shard_remove($oid, $docid); - } + warn "I: remove #$docid $eidx_key @oid\n"; + $idx->shard_remove($docid); } } diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index c18c7c36..0124dd11 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -445,20 +445,20 @@ sub add_message { $smsg->{num}; } -sub _get_doc ($$$) { - my ($self, $docid, $oid) = @_; +sub _get_doc ($$) { + my ($self, $docid) = @_; my $doc = eval { $self->{xdb}->get_document($docid) }; $doc // do { warn "E: $@\n" if $@; - warn "E: #$docid $oid missing in Xapian\n"; + warn "E: #$docid missing in Xapian\n"; undef; } } sub add_eidx_info { - my ($self, $docid, $oid, $eidx_key, $eml) = @_; + my ($self, $docid, $eidx_key, $eml) = @_; begin_txn_lazy($self); - my $doc = _get_doc($self, $docid, $oid) or return; + my $doc = _get_doc($self, $docid) or return; term_generator($self)->set_document($doc); $doc->add_boolean_term('O'.$eidx_key); index_list_id($self, $doc, $eml); @@ -466,9 +466,9 @@ sub add_eidx_info { } sub remove_eidx_info { - my ($self, $docid, $oid, $eidx_key, $eml) = @_; + my ($self, $docid, $eidx_key, $eml) = @_; begin_txn_lazy($self); - my $doc = _get_doc($self, $docid, $oid) or return; + my $doc = _get_doc($self, $docid) or return; eval { $doc->remove_term('O'.$eidx_key) }; warn "W: ->remove_term O$eidx_key: $@\n" if $@; for my $l ($eml ? $eml->header_raw('List-Id') : ()) { @@ -512,25 +512,19 @@ sub smsg_from_doc ($) { } sub xdb_remove { - my ($self, $oid, @removed) = @_; + my ($self, @docids) = @_; my $xdb = $self->{xdb} or return; - for my $num (@removed) { - my $doc = _get_doc($self, $num, $oid) or next; - my $smsg = smsg_from_doc($doc); - my $blob = $smsg->{blob}; # may be undef if --skip-docdata - if (!defined($blob) || $blob eq $oid) { - $xdb->delete_document($num); - } else { - warn "E: #$num $oid != $blob in Xapian\n"; - } + for my $docid (@docids) { + eval { $xdb->delete_document($docid) }; + warn "E: #$docid not in in Xapian? $@\n" if $@; } } -sub remove_by_oid { - my ($self, $oid, $num) = @_; - die "BUG: remove_by_oid is v2-only\n" if $self->{oidx}; +sub remove_by_docid { + my ($self, $num) = @_; + die "BUG: remove_by_docid is v2-only\n" if $self->{oidx}; $self->begin_txn_lazy; - xdb_remove($self, $oid, $num) if need_xapian($self); + xdb_remove($self, $num) if need_xapian($self); } sub index_git_blob_id { @@ -566,7 +560,7 @@ sub unindex_eml { } else { # just in case msgmap and over.sqlite3 become desynched: $self->{mm}->mid_delete($mids->[0]); } - xdb_remove($self, $oid, keys %tmp) if need_xapian($self); + xdb_remove($self, keys %tmp) if need_xapian($self); } sub index_mm { diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 53fac9b6..182bbde2 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -79,19 +79,16 @@ sub shard_worker_loop ($$$$$) { # no need to lock < 512 bytes is atomic under POSIX print $bnote "barrier $shard\n" or die "write failed for barrier $!\n"; - } elsif ($line =~ /\AD ([a-f0-9]{40,}) ([0-9]+)\n\z/s) { - $self->remove_by_oid($1, $2 + 0); + } elsif ($line =~ /\AD ([0-9]+)\n\z/s) { + $self->remove_by_docid($1 + 0); } elsif ($line =~ s/\A\+X //) { - my ($len, $docid, $oid, $eidx_key) = - split(/ /, $line, 4); + my ($len, $docid, $eidx_key) = split(/ /, $line, 3); chomp $eidx_key; - $self->add_eidx_info($docid, $oid, $eidx_key, - eml($r, $len)); + $self->add_eidx_info($docid, $eidx_key, eml($r, $len)); } elsif ($line =~ s/\A-X //) { - my ($len, $docid, $oid, $eidx_key) = - split(/ /, $line, 4); + my ($len, $docid, $eidx_key) = split(/ /, $line, 3); chomp $eidx_key; - $self->remove_eidx_info($docid, $oid, $eidx_key, + $self->remove_eidx_info($docid, $eidx_key, eml($r, $len)); } elsif ($line =~ s/\AO ([^\n]+)\n//) { my $over_fn = $1; @@ -147,27 +144,27 @@ sub index_raw { } sub shard_add_eidx_info { - my ($self, $docid, $oid, $xibx, $eml) = @_; + my ($self, $docid, $xibx, $eml) = @_; my $eidx_key = $xibx->eidx_key; if (my $w = $self->{w}) { my $hdr = $eml->header_obj->as_string; my $len = length($hdr); - print $w "+X $len $docid $oid $eidx_key\n", $hdr or + print $w "+X $len $docid $eidx_key\n", $hdr or die "failed to write shard: $!"; } else { - $self->add_eidx_info($docid, $oid, $eidx_key, $eml); + $self->add_eidx_info($docid, $eidx_key, $eml); } } sub shard_remove_eidx_info { - my ($self, $docid, $oid, $eidx_key, $eml) = @_; + my ($self, $docid, $eidx_key, $eml) = @_; if (my $w = $self->{w}) { my $hdr = $eml ? $eml->header_obj->as_string : ''; my $len = length($hdr); - print $w "-X $len $docid $oid $eidx_key\n", $hdr or + print $w "-X $len $docid $eidx_key\n", $hdr or die "failed to write shard: $!"; } else { - $self->remove_eidx_info($docid, $oid, $eidx_key, $eml); + $self->remove_eidx_info($docid, $eidx_key, $eml); } } @@ -208,17 +205,17 @@ sub shard_close { } sub shard_remove { - my ($self, $oid, $num) = @_; - if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child - print $w "D $oid $num\n" or die "failed to write remove $!"; + my ($self, $num) = @_; + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child + print $w "D $num\n" or die "failed to write remove $!"; } else { # same process - $self->remove_by_oid($oid, $num); + $self->remove_by_docid($num); } } sub shard_over_check { my ($self, $over) = @_; - if (my $w = $self->{w}) { # triggers remove_by_oid in a shard child + if (my $w = $self->{w}) { # triggers remove_by_docid in a shard child my ($over_fn) = $over->{dbh}->sqlite_db_filename; $over_fn =~ tr/\n/\0/; print $w "O $over_fn\n" or die "failed to write over $!"; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index e9a43000..5aec7561 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -1141,7 +1141,7 @@ sub unindex_oid_aux ($$$) { my @removed = $self->{oidx}->remove_oid($oid, $mid); for my $num (@removed) { my $idx = idx_shard($self, $num); - $idx->shard_remove($oid, $num); + $idx->shard_remove($num); } } -- cgit v1.2.3-24-ge0c7