user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 4/7] searchidxshard: replace index_raw with index_eml
Date: Sun,  3 Jan 2021 02:06:14 +0000	[thread overview]
Message-ID: <20210103020617.15719-5-e@80x24.org> (raw)
In-Reply-To: <20210103020617.15719-1-e@80x24.org>

Since Storable and Sereal are designed for lossless
serialization, we'll just pass $eml objects to whatever process
is running SearchIdx.
---
 lib/PublicInbox/ExtSearchIdx.pm   |  4 ++--
 lib/PublicInbox/LeiStore.pm       |  3 ++-
 lib/PublicInbox/SearchIdxShard.pm |  9 ++-------
 lib/PublicInbox/V2Writable.pm     | 11 +++++------
 4 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 064d9939..d55d3db9 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -135,7 +135,7 @@ sub index_unseen ($) {
 	my $oid = $new_smsg->{blob};
 	my $ibx = delete $req->{ibx} or die 'BUG: {ibx} unset';
 	$self->{oidx}->add_xref3($docid, $req->{xnum}, $oid, $ibx->eidx_key);
-	$idx->index_raw(undef, $eml, $new_smsg, $ibx->eidx_key);
+	$idx->index_eml($eml, $new_smsg, $ibx->eidx_key);
 	check_batch_limit($req);
 }
 
@@ -437,7 +437,7 @@ sub _reindex_finalize ($$$) {
 	my $top_smsg = pop @$stable;
 	$top_smsg == $smsg or die 'BUG: top_smsg != smsg';
 	my $ibx = _ibx_for($self, $sync, $smsg);
-	$idx->index_raw(undef, $eml, $smsg, $ibx->eidx_key);
+	$idx->index_eml($eml, $smsg, $ibx->eidx_key);
 	for my $x (reverse @$stable) {
 		$ibx = _ibx_for($self, $sync, $x);
 		my $hdr = delete $x->{hdr} // die 'BUG: no {hdr}';
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index d686e95a..4f77e8fa 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -199,6 +199,7 @@ sub add_eml {
 	$im->add($eml, undef, $smsg) or return; # duplicate returns undef
 	my $msgref = delete $smsg->{-raw_email};
 	$smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
+	undef $msgref;
 
 	local $self->{current_info} = $smsg->{blob};
 	if (my @docids = _docids_for($self, $eml)) {
@@ -215,7 +216,7 @@ sub add_eml {
 		$oidx->add_overview($eml, $smsg);
 		$oidx->add_xref3($smsg->{num}, -1, $smsg->{blob}, '.');
 		my $idx = $eidx->idx_shard($smsg->{num});
-		$idx->index_raw($msgref, $eml, $smsg);
+		$idx->index_eml($eml, $smsg);
 		$idx->ipc_do('add_keywords', $smsg->{num}, @kw) if @kw;
 		$smsg;
 	}
diff --git a/lib/PublicInbox/SearchIdxShard.pm b/lib/PublicInbox/SearchIdxShard.pm
index 43dad959..83cbbb25 100644
--- a/lib/PublicInbox/SearchIdxShard.pm
+++ b/lib/PublicInbox/SearchIdxShard.pm
@@ -43,13 +43,8 @@ sub ipc_atfork_child { # called automatically before ipc_worker_loop
 	PublicInbox::OnDestroy->new($$, \&_worker_done, $self);
 }
 
-sub index_raw {
-	my ($self, $msgref, $eml, $smsg, $eidx_key) = @_;
-	if ($eml) {
-		undef($$msgref) if $msgref;
-	} else { # --xapian-only + --sequential-shard:
-		$eml = PublicInbox::Eml->new($msgref);
-	}
+sub index_eml {
+	my ($self, $eml, $smsg, $eidx_key) = @_;
 	$smsg->{eidx_key} = $eidx_key if defined $eidx_key;
 	$self->ipc_do('add_message', $eml, $smsg);
 }
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 885edbe9..7b6b93a0 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -140,11 +140,11 @@ sub idx_shard ($$) {
 
 # indexes a message, returns true if checkpointing is needed
 sub do_idx ($$$$) {
-	my ($self, $msgref, $mime, $smsg) = @_;
+	my ($self, $msgref, $eml, $smsg) = @_;
 	$smsg->{bytes} = $smsg->{raw_bytes} + crlf_adjust($$msgref);
-	$self->{oidx}->add_overview($mime, $smsg);
+	$self->{oidx}->add_overview($eml, $smsg);
 	my $idx = idx_shard($self, $smsg->{num});
-	$idx->index_raw($msgref, $mime, $smsg);
+	$idx->index_eml($eml, $smsg);
 	my $n = $self->{transact_bytes} += $smsg->{raw_bytes};
 	$n >= $self->{batch_bytes};
 }
@@ -173,8 +173,7 @@ sub _add {
 	$cmt = $im->get_mark($cmt);
 	$self->{last_commit}->[$self->{epoch_max}] = $cmt;
 
-	my $msgref = delete $smsg->{-raw_email};
-	if (do_idx($self, $msgref, $mime, $smsg)) {
+	if (do_idx($self, delete $smsg->{-raw_email}, $mime, $smsg)) {
 		$self->checkpoint;
 	}
 
@@ -1219,7 +1218,7 @@ sub index_xap_only { # git->cat_async callback
 	my $self = $smsg->{self};
 	my $idx = idx_shard($self, $smsg->{num});
 	$smsg->{raw_bytes} = $size;
-	$idx->index_raw($bref, undef, $smsg);
+	$idx->index_eml(PublicInbox::Eml->new($bref), $smsg);
 	$self->{transact_bytes} += $size;
 }
 

  parent reply	other threads:[~2021-01-03  2:06 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-03  2:06 [PATCH 0/7] v2: swap in new IPC package Eric Wong
2021-01-03  2:06 ` [PATCH 1/7] ipc: some documentation comments Eric Wong
2021-01-03  2:06 ` [PATCH 2/7] searchidxshard: use PublicInbox::IPC to kill lots of code Eric Wong
2021-01-03  2:06 ` [PATCH 3/7] searchidxshard: IPC conversion, part 2 Eric Wong
2021-01-03  2:06 ` Eric Wong [this message]
2021-01-03  2:06 ` [PATCH 5/7] use Eml (or MIME) objects for all indexing paths Eric Wong
2021-01-03  2:06 ` [PATCH 6/7] ipc: switch to one-way pipes Eric Wong
2021-01-03  2:06 ` [PATCH 7/7] searchidxshard: use add_xapian directly for v2 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210103020617.15719-5-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).