From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 2ECF41FA17 for ; Sun, 3 Jan 2021 02:06:18 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/7] searchidxshard: replace index_raw with index_eml Date: Sun, 3 Jan 2021 02:06:14 +0000 Message-Id: <20210103020617.15719-5-e@80x24.org> In-Reply-To: <20210103020617.15719-1-e@80x24.org> References: <20210103020617.15719-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since Storable and Sereal are designed for lossless serialization, we'll just pass $eml objects to whatever process is running SearchIdx. --- lib/PublicInbox/ExtSearchIdx.pm | 4 ++-- lib/PublicInbox/LeiStore.pm | 3 ++- lib/PublicInbox/SearchIdxShard.pm | 9 ++------- lib/PublicInbox/V2Writable.pm | 11 +++++------ 4 files changed, 11 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 064d9939..d55d3db9 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -135,7 +135,7 @@ sub index_unseen ($) { my $oid = $new_smsg->{blob}; my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset'; $self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key); - $idx->index_raw(undef, $eml, $new_smsg, $ibx->eidx_key); + $idx->index_eml($eml, $new_smsg, $ibx->eidx_key); check_batch_limit($req); } @@ -437,7 +437,7 @@ sub _reindex_finalize ($$$) { my $top_smsg = pop @$stable; $top_smsg == $smsg or die 'BUG: top_smsg != smsg'; my $ibx = _ibx_for($self, $sync, $smsg); - $idx->index_raw(undef, $eml, $smsg, $ibx->eidx_key); + $idx->index_eml($eml, $smsg, $ibx->eidx_key); for my $x (reverse @$stable) { $ibx = _ibx_for($self, $sync, $x); my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}'; diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index d686e95a..4f77e8fa 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -199,6 +199,7 @@ sub add_eml { $im->add($eml, undef, $smsg) or return; # duplicate returns undef my $msgref = delete $smsg->{-raw_email}; $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref); + undef $msgref; local $self->{current_info} = $smsg->{blob}; if (my @docids = _docids_for($self, $eml)) { @@ -215,7 +216,7 @@ sub add_eml { $oidx->add_overview($eml, $smsg); $oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.'); my $idx = $eidx->idx_shard($smsg->{num}); - $idx->index_raw($msgref, $eml, $smsg); + $idx->index_eml($eml, $smsg); $idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw; $smsg; } diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm index 43dad959..83cbbb25 100644 --- a/lib/PublicInbox/SearchIdxShard.pm +++ b/lib/PublicInbox/SearchIdxShard.pm @@ -43,13 +43,8 @@ sub ipc_atfork_child { # called automatically before ipc_worker_loop PublicInbox::OnDestroy->new($$, \&_worker_done, $self); } -sub index_raw { - my ($self, $msgref, $eml, $smsg, $eidx_key) = @_; - if ($eml) { - undef($$msgref) if $msgref; - } else { # --xapian-only + --sequential-shard: - $eml = PublicInbox::Eml->new($msgref); - } +sub index_eml { + my ($self, $eml, $smsg, $eidx_key) = @_; $smsg->{eidx_key} = $eidx_key if defined $eidx_key; $self->ipc_do('add_message', $eml, $smsg); } diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 885edbe9..7b6b93a0 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -140,11 +140,11 @@ sub idx_shard ($$) { # indexes a message, returns true if checkpointing is needed sub do_idx ($$$$) { - my ($self, $msgref, $mime, $smsg) = @_; + my ($self, $msgref, $eml, $smsg) = @_; $smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref); - $self->{oidx}->add_overview($mime, $smsg); + $self->{oidx}->add_overview($eml, $smsg); my $idx = idx_shard($self, $smsg->{num}); - $idx->index_raw($msgref, $mime, $smsg); + $idx->index_eml($eml, $smsg); my $n = $self->{transact_bytes} += $smsg->{raw_bytes}; $n >= $self->{batch_bytes}; } @@ -173,8 +173,7 @@ sub _add { $cmt = $im->get_mark($cmt); $self->{last_commit}->[$self->{epoch_max}] = $cmt; - my $msgref = delete $smsg->{-raw_email}; - if (do_idx($self, $msgref, $mime, $smsg)) { + if (do_idx($self, delete $smsg->{-raw_email}, $mime, $smsg)) { $self->checkpoint; } @@ -1219,7 +1218,7 @@ sub index_xap_only { # git->cat_async callback my $self = $smsg->{self}; my $idx = idx_shard($self, $smsg->{num}); $smsg->{raw_bytes} = $size; - $idx->index_raw($bref, undef, $smsg); + $idx->index_eml(PublicInbox::Eml->new($bref), $smsg); $self->{transact_bytes} += $size; }