about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-01-03 02:06:14 +0000
committerEric Wong <e@80x24.org>2021-01-03 18:30:31 +0000
commit323d8bac125e89a76c904a54a7ae0b2e36f05cc6 (patch)
tree0220aee8cc0b691fe8ca0a54abd2a528288f3e06
parent82b805db3ad908f1c3ea114f329835c6f881144a (diff)
downloadpublic-inbox-323d8bac125e89a76c904a54a7ae0b2e36f05cc6.tar.gz
Since Storable and Sereal are designed for lossless
serialization, we'll just pass $eml objects to whatever process
is running SearchIdx.
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm4
-rw-r--r--lib/PublicInbox/LeiStore.pm3
-rw-r--r--lib/PublicInbox/SearchIdxShard.pm9
-rw-r--r--lib/PublicInbox/V2Writable.pm11
4 files changed, 11 insertions, 16 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 064d9939..d55d3db9 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -135,7 +135,7 @@ sub index_unseen ($) {
         my $oid = $new_smsg->{blob};
         my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
         $self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
-        $idx->index_raw(undef, $eml, $new_smsg, $ibx->eidx_key);
+        $idx->index_eml($eml, $new_smsg, $ibx->eidx_key);
         check_batch_limit($req);
 }
 
@@ -437,7 +437,7 @@ sub _reindex_finalize ($$$) {
         my $top_smsg = pop @$stable;
         $top_smsg == $smsg or die 'BUG: top_smsg != smsg';
         my $ibx = _ibx_for($self, $sync, $smsg);
-        $idx->index_raw(undef, $eml, $smsg, $ibx->eidx_key);
+        $idx->index_eml($eml, $smsg, $ibx->eidx_key);
         for my $x (reverse @$stable) {
                 $ibx = _ibx_for($self, $sync, $x);
                 my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index d686e95a..4f77e8fa 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -199,6 +199,7 @@ sub add_eml {
         $im->add($eml, undef, $smsg) or return; # duplicate returns undef
         my $msgref = delete $smsg->{-raw_email};
         $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
+        undef $msgref;
 
         local $self->{current_info} = $smsg->{blob};
         if (my @docids = _docids_for($self, $eml)) {
@@ -215,7 +216,7 @@ sub add_eml {
                 $oidx->add_overview($eml, $smsg);
                 $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
                 my $idx = $eidx->idx_shard($smsg->{num});
-                $idx->index_raw($msgref, $eml, $smsg);
+                $idx->index_eml($eml, $smsg);
                 $idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
                 $smsg;
         }
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 43dad959..83cbbb25 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -43,13 +43,8 @@ sub ipc_atfork_child { # called automatically before ipc_worker_loop
         PublicInbox::OnDestroy->new($$, \&_worker_done, $self);
 }
 
-sub index_raw {
-        my ($self, $msgref, $eml, $smsg, $eidx_key) = @_;
-        if ($eml) {
-                undef($$msgref) if $msgref;
-        } else { # --xapian-only + --sequential-shard:
-                $eml = PublicInbox::Eml->new($msgref);
-        }
+sub index_eml {
+        my ($self, $eml, $smsg, $eidx_key) = @_;
         $smsg->{eidx_key} = $eidx_key if defined $eidx_key;
         $self->ipc_do('add_message', $eml, $smsg);
 }
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 885edbe9..7b6b93a0 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -140,11 +140,11 @@ sub idx_shard ($$) {
 
 # indexes a message, returns true if checkpointing is needed
 sub do_idx ($$$$) {
-        my ($self, $msgref, $mime, $smsg) = @_;
+        my ($self, $msgref, $eml, $smsg) = @_;
         $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
-        $self->{oidx}->add_overview($mime, $smsg);
+        $self->{oidx}->add_overview($eml, $smsg);
         my $idx = idx_shard($self, $smsg->{num});
-        $idx->index_raw($msgref, $mime, $smsg);
+        $idx->index_eml($eml, $smsg);
         my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
         $n >= $self->{batch_bytes};
 }
@@ -173,8 +173,7 @@ sub _add {
         $cmt = $im->get_mark($cmt);
         $self->{last_commit}->[$self->{epoch_max}] = $cmt;
 
-        my $msgref = delete $smsg->{-raw_email};
-        if (do_idx($self, $msgref, $mime, $smsg)) {
+        if (do_idx($self, delete $smsg->{-raw_email}, $mime, $smsg)) {
                 $self->checkpoint;
         }
 
@@ -1219,7 +1218,7 @@ sub index_xap_only { # git->cat_async callback
         my $self = $smsg->{self};
         my $idx = idx_shard($self, $smsg->{num});
         $smsg->{raw_bytes} = $size;
-        $idx->index_raw($bref, undef, $smsg);
+        $idx->index_eml(PublicInbox::Eml->new($bref), $smsg);
         $self->{transact_bytes} += $size;
 }