From ea11b7b17d525d20a07d7f62c0334501c5a721b4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 15 Oct 2021 09:52:53 +0000 Subject: lei q: avoid kw lookup failure on remote mboxrd When importing several sources in parallel via http(s) mboxrd, we need to be able to get keywords of uncommitted documents directly from shard workers. Otherwise, Xapian DocNotFound errors happen because the read-only LeiSearch won't see documents from uncomitted transactions. Keep in mind that it's possible the keywords can be changed on-the-fly even for uncommitted documents because of inotify watches from LeiNoteEvent. --- lib/PublicInbox/LeiStore.pm | 28 +++++++++++++++++++++++----- lib/PublicInbox/LeiXSearch.pm | 8 +++----- lib/PublicInbox/SearchIdx.pm | 6 ++++++ 3 files changed, 32 insertions(+), 10 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index bf41dcf5..c45380d1 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -328,6 +328,20 @@ sub _add_vmd ($$$$) { sto_export_kw($self, $docid, $vmd); } +sub _docids_and_maybe_kw ($$) { + my ($self, $docids) = @_; + return $docids unless wantarray; + my $kw = {}; + for my $num (@$docids) { # likely only 1, unless ContentHash changes + # can't use ->search->msg_keywords on uncommitted docs + my $idx = $self->{priv_eidx}->idx_shard($num); + my $tmp = eval { $idx->ipc_do('get_terms', 'K', $num) }; + if ($@) { warn "#$num get_terms: $@" } + else { @$kw{keys %$tmp} = values(%$tmp) }; + } + ($docids, [ sort keys %$kw ]); +} + sub add_eml { my ($self, $eml, $vmd, $xoids) = @_; my $im = $self->{-fake_im} // $self->importer; # may create new epoch @@ -339,7 +353,11 @@ sub add_eml { if ($vmd && $vmd->{sync_info}) { set_sync_info($self, $smsg->{blob}, @{$vmd->{sync_info}}); } - $im_mark or return; # duplicate blob returns undef + unless ($im_mark) { # duplicate blob returns undef + return unless wantarray; + my @docids = $oidx->blob_exists($smsg->{blob}); + return _docids_and_maybe_kw $self, \@docids; + } local $self->{current_info} = $smsg->{blob}; my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches @@ -373,7 +391,7 @@ sub add_eml { } _add_vmd($self, $idx, $docid, $vmd) if $vmd; } - $vivify_xvmd; + _docids_and_maybe_kw $self, $vivify_xvmd; } elsif (my @docids = _docids_for($self, $eml)) { # fuzzy match from within lei/store for my $docid (@docids) { @@ -383,8 +401,8 @@ sub add_eml { $idx->ipc_do('add_eidx_info', $docid, '.', $eml); _add_vmd($self, $idx, $docid, $vmd) if $vmd; } - \@docids; - } else { # totally new message + _docids_and_maybe_kw $self, \@docids; + } else { # totally new message, no keywords delete $smsg->{-oidx}; # for IPC-friendliness $smsg->{num} = $oidx->adj_counter('eidx_docid', '+'); $oidx->add_overview($eml, $smsg); @@ -392,7 +410,7 @@ sub add_eml { my $idx = $eidx->idx_shard($smsg->{num}); $idx->index_eml($eml, $smsg); _add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd; - $smsg; + wantarray ? ($smsg, []) : $smsg; } } diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm index fba16861..3ec75528 100644 --- a/lib/PublicInbox/LeiXSearch.pm +++ b/lib/PublicInbox/LeiXSearch.pm @@ -282,11 +282,9 @@ sub each_remote_eml { # callback for MboxReader->mboxrd my $xoids = $lei->{ale}->xoids_for($eml, 1); my $smsg = bless {}, 'PublicInbox::Smsg'; if ($self->{import_sto} && !$xoids) { - my $res = $self->{import_sto}->wq_do('add_eml', $eml); - if (ref($res) eq ref($smsg)) { # totally new message - $smsg = $res; - $smsg->{kw} = []; # short-circuit xsmsg_vmd - } + my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml); + $smsg = $res if ref($res) eq ref($smsg); # totally new message + $smsg->{kw} = $kw; # short-circuit xsmsg_vmd } $smsg->{blob} //= $xoids ? (keys(%$xoids))[0] : $lei->git_oid($eml)->hexdigest; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 928152ec..585f28f5 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -517,6 +517,12 @@ sub add_eidx_info { $self->{xdb}->replace_document($docid, $doc); } +sub get_terms { + my ($self, $pfx, $docid) = @_; + begin_txn_lazy($self); + xap_terms($pfx, $self->{xdb}, $docid); +} + sub remove_eidx_info { my ($self, $docid, $eidx_key, $eml) = @_; begin_txn_lazy($self); -- cgit v1.2.3-24-ge0c7