user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/9] lei q: fix augment of compressed mailboxes
Date: Tue, 19 Jan 2021 09:34:31 +0000	[thread overview]
Message-ID: <20210119093435.17955-6-e@80x24.org> (raw)
In-Reply-To: <20210119093435.17955-1-e@80x24.org>

We need to delay writing out the mailbox until the compressor
process is up and running, so have startq wait a bit.  This
means we must create the pipe early and hand it off to the
workers before augmenting, despite spawning the
gzip/pigz/xz/bzip2 process after augment is complete.
---
 lib/PublicInbox/LEI.pm        |  1 +
 lib/PublicInbox/LeiToMail.pm  | 19 +++++++++-------
 lib/PublicInbox/LeiXSearch.pm | 40 +++++++++++++++++++++------------
 t/lei.t                       | 42 ++++++++++++++++++++++-------------
 t/lei_to_mail.t               |  4 ++--
 5 files changed, 66 insertions(+), 40 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e4f8bedb..f3edfe82 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -758,6 +758,7 @@ sub accept_dispatch { # Listener {post_accept} callback
 sub dclose {
 	my ($self) = @_;
 	delete $self->{lxs}; # stops LeiXSearch queries
+	close(delete $self->{1}) if $self->{1}; # may reap_compress
 	$self->close if $self->{sock}; # PublicInbox::DS::close
 }
 
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 99388b5b..a6e517ea 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -200,18 +200,19 @@ sub zsfx2cmd ($$$) {
 }
 
 sub _post_augment_mbox { # open a compressor process
-	my ($self, $lei) = @_;
+	my ($self, $lei, $zpipe) = @_;
 	my $zsfx = $self->{zsfx} or return;
 	my $cmd = zsfx2cmd($zsfx, undef, $lei);
-	pipe(my ($r, $w)) or die "pipe: $!";
+	my ($r, $w) = splice(@$zpipe, 0, 2);
 	my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} };
 	my $pid = spawn($cmd, $lei->{env}, $rdr);
-	$lei->{"pid.$pid"} = $cmd;
 	my $pp = gensym;
-	tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w, \&reap_compress, $lei;
+	my $dup = bless { "pid.$pid" => $cmd }, ref($lei);
+	$dup->{$_} = $lei->{$_} for qw(2 sock);
+	tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w, \&reap_compress, $dup;
 	$lei->{1} = $pp;
 	die 'BUG: unexpected {ovv}->{lock_path}' if $lei->{ovv}->{lock_path};
-	$lei->{ovv}->ovv_out_lk_init if ($lei->{opt}->{jobs} // 2) > 1;
+	$lei->{ovv}->ovv_out_lk_init;
 }
 
 sub decompress_src ($$$) {
@@ -395,7 +396,9 @@ sub _pre_augment_mbox {
 		die "seek($dst): $!\n";
 	}
 	state $zsfx_allow = join('|', keys %zsfx2cmd);
-	($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/);
+	($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/) or return;
+	pipe(my ($r, $w)) or die "pipe: $!";
+	[ $r, $w ];
 }
 
 sub _do_augment_mbox {
@@ -433,10 +436,10 @@ sub do_augment { # slow, runs in wq worker
 }
 
 sub post_augment { # fast (spawn compressor or mkdir), runs in main daemon
-	my ($self, $lei) = @_;
+	my ($self, $lei, @args) = @_;
 	# _post_augment_maildir, _post_augment_mbox
 	my $m = "_post_augment_$self->{base_type}";
-	$self->$m($lei);
+	$self->$m($lei, @args);
 }
 
 sub write_mail { # via ->wq_do
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 120857b8..002791c2 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -191,17 +191,22 @@ sub query_done { # EOF callback
 		dwaitpid($_, $ipc_worker_reap, $l2m) for @$pids;
 	}
 	$lei->{ovv}->ovv_end($lei);
-	$lei->start_mua if $l2m;
+	if ($l2m) { # calls LeiToMail reap_compress
+		close(delete($lei->{1})) if $lei->{1};
+		$lei->start_mua;
+	}
 	$lei->dclose;
 }
 
+sub do_post_augment {
+	my ($lei, $zpipe, $au_done) = @_;
+	my $l2m = $lei->{l2m} or die 'BUG: no {l2m}';
+	$l2m->post_augment($lei, $zpipe);
+	close $au_done; # triggers wait_startq
+}
+
 sub start_query { # always runs in main (lei-daemon) process
 	my ($self, $io, $lei, $srcs) = @_;
-	if (my $l2m = $lei->{l2m}) {
-		$lei->{1} = $io->[1];
-		$l2m->post_augment($lei);
-		$io->[1] = delete $lei->{1};
-	}
 	my $remotes = $self->{remotes} // [];
 	if ($lei->{opt}->{thread}) {
 		for my $ibxish (@$srcs) {
@@ -221,9 +226,11 @@ sub start_query { # always runs in main (lei-daemon) process
 sub query_prepare { # called by wq_do
 	my ($self, $lei) = @_;
 	my %sig = $lei->atfork_child_wq($self);
+	-p $lei->{0} or die "BUG: \$done pipe expected";
 	local @SIG{keys %sig} = values %sig;
 	eval { $lei->{l2m}->do_augment($lei) };
 	$lei->fail($@) if $@;
+	syswrite($lei->{0}, '.') == 1 or die "do_post_augment trigger: $!";
 }
 
 sub sigpipe_handler {
@@ -253,26 +260,31 @@ sub do_query {
 	$done = PublicInbox::OpPipe->new($done, $done_op, $in_loop);
 	my $l2m = $lei->{l2m};
 	if ($l2m) {
-		$l2m->pre_augment($lei_orig); # may redirect $lei->{1} for mbox
+		# may redirect $lei->{1} for mbox
+		my $zpipe = $l2m->pre_augment($lei_orig);
 		$io[1] = $lei_orig->{1};
-		my @l2m_io = (undef, @io[1..$#io]);
-		pipe(my $startq, $l2m_io[0]) or die "pipe: $!";
-		$self->wq_do('query_prepare', \@l2m_io, $lei);
+		pipe(my ($startq, $au_done)) or die "pipe: $!";
+		$done_op->{'.'} = [ \&do_post_augment, $lei_orig,
+					$zpipe, $au_done ];
 		$io[4] = *STDERR{GLOB}; # don't send l2m->{-wq_s1}
+		$self->wq_do('query_prepare', \@io, $lei);
 		die "BUG: unexpected \$io[5]: $io[5]" if $io[5];
 		fcntl($startq, 1031, 4096) if $^O eq 'linux'; # F_SETPIPE_SZ
 		$io[5] = $startq;
+		$io[1] = $zpipe->[1] if $zpipe;
 	}
 	start_query($self, \@io, $lei, $srcs);
 	unless ($in_loop) {
 		my @pids = $self->wq_close;
 		# for the $lei->atfork_child_wq PIPE handler:
 		$done_op->{'!'}->[3] = \@pids;
-		$done->event_step;
+		# $done->event_step;
+		# my $ipc_worker_reap = $self->can('ipc_worker_reap');
+		# if (my $l2m_pids = delete $self->{l2m_pids}) {
+			# dwaitpid($_, $ipc_worker_reap, $l2m) for @$l2m_pids;
+		# }
+		while ($done->{sock}) { $done->event_step }
 		my $ipc_worker_reap = $self->can('ipc_worker_reap');
-		if (my $l2m_pids = delete $self->{l2m_pids}) {
-			dwaitpid($_, $ipc_worker_reap, $l2m) for @$l2m_pids;
-		}
 		dwaitpid($_, $ipc_worker_reap, $self) for @pids;
 	}
 }
diff --git a/t/lei.t b/t/lei.t
index c4692217..8eede13e 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -189,25 +189,35 @@ my $test_external = sub {
 	# No double-quoting should be imposed on users on the CLI
 	$lei->('q', 's:use boolean prefix');
 	like($out, qr/search: use boolean prefix/, 'phrase search got result');
+	require IO::Uncompress::Gunzip;
+	for my $sfx ('', '.gz') {
+		my $f = "$home/mbox$sfx";
+		$lei->('q', '-o', "mboxcl2:$f", 's:use boolean prefix');
+		my $cat = $sfx eq '' ? sub {
+			open my $mb, '<', $f or fail "no mbox: $!";
+			<$mb>
+		} : sub {
+			my $z = IO::Uncompress::Gunzip->new($f, MultiStream=>1);
+			<$z>;
+		};
+		my @s = grep(/^Subject:/, $cat->());
+		is(scalar(@s), 1, "1 result in mbox$sfx");
+		$lei->('q', '-a', '-o', "mboxcl2:$f", 's:see attachment');
+		is($err, '', 'no errors from augment');
+		@s = grep(/^Subject:/, my @wtf = $cat->());
+		is(scalar(@s), 2, "2 results in mbox$sfx");
 
-	$lei->('q', '-o', "mboxcl2:$home/mbox", 's:use boolean prefix');
-	open my $mb, '<', "$home/mbox" or fail "no mbox: $!";
-	my @s = grep(/^Subject:/, <$mb>);
-	is(scalar(@s), 1, '1 result in mbox');
-	$lei->('q', '-a', '-o', "mboxcl2:$home/mbox", 's:see attachment');
-	is($err, '', 'no errors from augment');
-	seek($mb, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-	@s = grep(/^Subject:/, <$mb>);
-	is(scalar(@s), 2, '2 results in mbox');
+		$lei->('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent');
+		is($err, '', "no errors on no results ($sfx)");
 
-	$lei->('q', '-a', '-o', "mboxcl2:$home/mbox", 's:nonexistent');
-	is($err, '', 'no errors on no results');
-	seek($mb, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-	my @s2 = grep(/^Subject:/, <$mb>);
-	is_deeply(\@s2, \@s, 'same 2 old results w/ --augment and bad search');
+		my @s2 = grep(/^Subject:/, $cat->());
+		is_deeply(\@s2, \@s,
+			"same 2 old results w/ --augment and bad search $sfx");
 
-	$lei->('q', '-o', "mboxcl2:$home/mbox", 's:nonexistent');
-	is(-s "$home/mbox", 0, 'clobber w/o --augment');
+		$lei->('q', '-o', "mboxcl2:$f", 's:nonexistent');
+		my @res = $cat->();
+		is_deeply(\@res, [], "clobber w/o --augment $sfx");
+	}
 };
 
 my $test_lei_common = sub {
diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index e5ac8eac..6673d9a6 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -94,9 +94,9 @@ my $wcb_get = sub {
 		my $dup = Storable::thaw(Storable::freeze($l2m));
 		is_deeply($dup, $l2m, "$fmt round-trips through storable");
 	}
-	$l2m->pre_augment($lei);
+	my $zpipe = $l2m->pre_augment($lei);
 	$l2m->do_augment($lei);
-	$l2m->post_augment($lei);
+	$l2m->post_augment($lei, $zpipe);
 	my $cb = $l2m->write_cb($lei);
 	delete $lei->{1};
 	$cb;

  parent reply	other threads:[~2021-01-19  9:34 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-19  9:34 [PATCH 0/9] lei bugfixes and error handling Eric Wong
2021-01-19  9:34 ` [PATCH 1/9] lei q: start ->mset while query_prepare runs Eric Wong
2021-01-19  9:34 ` [PATCH 2/9] lei q: fix SIGPIPE handling from lei2mail workers Eric Wong
2021-01-19  9:34 ` [PATCH 3/9] lei q: do not spawn MUA early Eric Wong
2021-01-19  9:34 ` [PATCH 4/9] lei: write daemon errors to the sock directory Eric Wong
2021-01-19  9:34 ` Eric Wong [this message]
2021-01-19  9:34 ` [PATCH 6/9] lei_overview: do not write if $lei->{1} is gone Eric Wong
2021-01-19  9:34 ` [PATCH 7/9] t/lei: fix double-running of socket test with oneshot Eric Wong
2021-01-19  9:34 ` [PATCH 8/9] lei: test some likely errors due to misuse Eric Wong
2021-01-19  9:34 ` [PATCH 9/9] lei_overview: start implementing format detection Eric Wong
2021-01-20  5:04 ` [PATCH 0/7] lei: fixes piled higher and deeper Eric Wong
2021-01-20  5:16   ` misnumbered, should be [PATCH 10/9]..[PATCH 16/9] :x Eric Wong
2021-01-20  5:04 ` [PATCH 1/7] lei: allow more mbox inode types Eric Wong
2021-01-20  5:04 ` [PATCH 2/7] lei: exit code in oneshot mode Eric Wong
2021-01-20  5:04 ` [PATCH 3/7] overidx: eidx_prep: fix leftover dbh reference Eric Wong
2021-01-20  5:04 ` [PATCH 4/7] lei q: cleanup store initialization Eric Wong
2021-01-20  5:04 ` [PATCH 5/7] lei: dump and clear errors.log in daemon mode Eric Wong
2021-01-20  5:04 ` [PATCH 6/7] lei_xsearch: keep l2m->{-wq_s1} while preparing query Eric Wong
2021-01-20  5:04 ` [PATCH 7/7] lei_to_mail: call PublicInbox::IPC::DESTROY Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210119093435.17955-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).