about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/LeiStore.pm28
-rw-r--r--lib/PublicInbox/LeiXSearch.pm8
-rw-r--r--lib/PublicInbox/SearchIdx.pm6
-rw-r--r--t/lei_store.t3
4 files changed, 34 insertions, 11 deletions
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index bf41dcf5..c45380d1 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -328,6 +328,20 @@ sub _add_vmd ($$$$) {
         sto_export_kw($self, $docid, $vmd);
 }
 
+sub _docids_and_maybe_kw ($$) {
+        my ($self, $docids) = @_;
+        return $docids unless wantarray;
+        my $kw = {};
+        for my $num (@$docids) { # likely only 1, unless ContentHash changes
+                # can't use ->search->msg_keywords on uncommitted docs
+                my $idx = $self->{priv_eidx}->idx_shard($num);
+                my $tmp = eval { $idx->ipc_do('get_terms', 'K', $num) };
+                if ($@) { warn "#$num get_terms: $@" }
+                else { @$kw{keys %$tmp} = values(%$tmp) };
+        }
+        ($docids, [ sort keys %$kw ]);
+}
+
 sub add_eml {
         my ($self, $eml, $vmd, $xoids) = @_;
         my $im = $self->{-fake_im} // $self->importer; # may create new epoch
@@ -339,7 +353,11 @@ sub add_eml {
         if ($vmd && $vmd->{sync_info}) {
                 set_sync_info($self, $smsg->{blob}, @{$vmd->{sync_info}});
         }
-        $im_mark or return; # duplicate blob returns undef
+        unless ($im_mark) { # duplicate blob returns undef
+                return unless wantarray;
+                my @docids = $oidx->blob_exists($smsg->{blob});
+                return _docids_and_maybe_kw $self, \@docids;
+        }
 
         local $self->{current_info} = $smsg->{blob};
         my $vivify_xvmd = delete($smsg->{-vivify_xvmd}) // []; # exact matches
@@ -373,7 +391,7 @@ sub add_eml {
                         }
                         _add_vmd($self, $idx, $docid, $vmd) if $vmd;
                 }
-                $vivify_xvmd;
+                _docids_and_maybe_kw $self, $vivify_xvmd;
         } elsif (my @docids = _docids_for($self, $eml)) {
                 # fuzzy match from within lei/store
                 for my $docid (@docids) {
@@ -383,8 +401,8 @@ sub add_eml {
                         $idx->ipc_do('add_eidx_info', $docid, '.', $eml);
                         _add_vmd($self, $idx, $docid, $vmd) if $vmd;
                 }
-                \@docids;
-        } else { # totally new message
+                _docids_and_maybe_kw $self, \@docids;
+        } else { # totally new message, no keywords
                 delete $smsg->{-oidx}; # for IPC-friendliness
                 $smsg->{num} = $oidx->adj_counter('eidx_docid', '+');
                 $oidx->add_overview($eml, $smsg);
@@ -392,7 +410,7 @@ sub add_eml {
                 my $idx = $eidx->idx_shard($smsg->{num});
                 $idx->index_eml($eml, $smsg);
                 _add_vmd($self, $idx, $smsg->{num}, $vmd) if $vmd;
-                $smsg;
+                wantarray ? ($smsg, []) : $smsg;
         }
 }
 
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index fba16861..3ec75528 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -282,11 +282,9 @@ sub each_remote_eml { # callback for MboxReader->mboxrd
         my $xoids = $lei->{ale}->xoids_for($eml, 1);
         my $smsg = bless {}, 'PublicInbox::Smsg';
         if ($self->{import_sto} && !$xoids) {
-                my $res = $self->{import_sto}->wq_do('add_eml', $eml);
-                if (ref($res) eq ref($smsg)) { # totally new message
-                        $smsg = $res;
-                        $smsg->{kw} = []; # short-circuit xsmsg_vmd
-                }
+                my ($res, $kw) = $self->{import_sto}->wq_do('add_eml', $eml);
+                $smsg = $res if ref($res) eq ref($smsg); # totally new message
+                $smsg->{kw} = $kw; # short-circuit xsmsg_vmd
         }
         $smsg->{blob} //= $xoids ? (keys(%$xoids))[0]
                                 : $lei->git_oid($eml)->hexdigest;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 928152ec..585f28f5 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -517,6 +517,12 @@ sub add_eidx_info {
         $self->{xdb}->replace_document($docid, $doc);
 }
 
+sub get_terms {
+        my ($self, $pfx, $docid) = @_;
+        begin_txn_lazy($self);
+        xap_terms($pfx, $self->{xdb}, $docid);
+}
+
 sub remove_eidx_info {
         my ($self, $docid, $eidx_key, $eml) = @_;
         begin_txn_lazy($self);
diff --git a/t/lei_store.t b/t/lei_store.t
index c31e27a2..40ad7800 100644
--- a/t/lei_store.t
+++ b/t/lei_store.t
@@ -138,7 +138,8 @@ Subject: timezone-dependent test
 WHAT IS TIME ANYMORE?
 EOM
 
-        ok($sto->add_eml($eml), 'recently received message');
+        my $smsg = $sto->add_eml($eml);
+        ok($smsg && $smsg->{blob}, 'recently received message');
         $sto->done;
         local $ENV{TZ} = 'GMT+5';
         my $lse = $sto->search;