From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 5/9] lei q: fix augment of compressed mailboxes
Date: Tue, 19 Jan 2021 09:34:31 +0000 [thread overview]
Message-ID: <20210119093435.17955-6-e@80x24.org> (raw)
In-Reply-To: <20210119093435.17955-1-e@80x24.org>
We need to delay writing out the mailbox until the compressor
process is up and running, so have startq wait a bit. This
means we must create the pipe early and hand it off to the
workers before augmenting, despite spawning the
gzip/pigz/xz/bzip2 process after augment is complete.
---
lib/PublicInbox/LEI.pm | 1 +
lib/PublicInbox/LeiToMail.pm | 19 +++++++++-------
lib/PublicInbox/LeiXSearch.pm | 40 +++++++++++++++++++++------------
t/lei.t | 42 ++++++++++++++++++++++-------------
t/lei_to_mail.t | 4 ++--
5 files changed, 66 insertions(+), 40 deletions(-)
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index e4f8bedb..f3edfe82 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -758,6 +758,7 @@ sub accept_dispatch { # Listener {post_accept} callback
sub dclose {
my ($self) = @_;
delete $self->{lxs}; # stops LeiXSearch queries
+ close(delete $self->{1}) if $self->{1}; # may reap_compress
$self->close if $self->{sock}; # PublicInbox::DS::close
}
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 99388b5b..a6e517ea 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -200,18 +200,19 @@ sub zsfx2cmd ($$$) {
}
sub _post_augment_mbox { # open a compressor process
- my ($self, $lei) = @_;
+ my ($self, $lei, $zpipe) = @_;
my $zsfx = $self->{zsfx} or return;
my $cmd = zsfx2cmd($zsfx, undef, $lei);
- pipe(my ($r, $w)) or die "pipe: $!";
+ my ($r, $w) = splice(@$zpipe, 0, 2);
my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2} };
my $pid = spawn($cmd, $lei->{env}, $rdr);
- $lei->{"pid.$pid"} = $cmd;
my $pp = gensym;
- tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w, \&reap_compress, $lei;
+ my $dup = bless { "pid.$pid" => $cmd }, ref($lei);
+ $dup->{$_} = $lei->{$_} for qw(2 sock);
+ tie *$pp, 'PublicInbox::ProcessPipe', $pid, $w, \&reap_compress, $dup;
$lei->{1} = $pp;
die 'BUG: unexpected {ovv}->{lock_path}' if $lei->{ovv}->{lock_path};
- $lei->{ovv}->ovv_out_lk_init if ($lei->{opt}->{jobs} // 2) > 1;
+ $lei->{ovv}->ovv_out_lk_init;
}
sub decompress_src ($$$) {
@@ -395,7 +396,9 @@ sub _pre_augment_mbox {
die "seek($dst): $!\n";
}
state $zsfx_allow = join('|', keys %zsfx2cmd);
- ($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/);
+ ($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/) or return;
+ pipe(my ($r, $w)) or die "pipe: $!";
+ [ $r, $w ];
}
sub _do_augment_mbox {
@@ -433,10 +436,10 @@ sub do_augment { # slow, runs in wq worker
}
sub post_augment { # fast (spawn compressor or mkdir), runs in main daemon
- my ($self, $lei) = @_;
+ my ($self, $lei, @args) = @_;
# _post_augment_maildir, _post_augment_mbox
my $m = "_post_augment_$self->{base_type}";
- $self->$m($lei);
+ $self->$m($lei, @args);
}
sub write_mail { # via ->wq_do
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 120857b8..002791c2 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -191,17 +191,22 @@ sub query_done { # EOF callback
dwaitpid($_, $ipc_worker_reap, $l2m) for @$pids;
}
$lei->{ovv}->ovv_end($lei);
- $lei->start_mua if $l2m;
+ if ($l2m) { # calls LeiToMail reap_compress
+ close(delete($lei->{1})) if $lei->{1};
+ $lei->start_mua;
+ }
$lei->dclose;
}
+sub do_post_augment {
+ my ($lei, $zpipe, $au_done) = @_;
+ my $l2m = $lei->{l2m} or die 'BUG: no {l2m}';
+ $l2m->post_augment($lei, $zpipe);
+ close $au_done; # triggers wait_startq
+}
+
sub start_query { # always runs in main (lei-daemon) process
my ($self, $io, $lei, $srcs) = @_;
- if (my $l2m = $lei->{l2m}) {
- $lei->{1} = $io->[1];
- $l2m->post_augment($lei);
- $io->[1] = delete $lei->{1};
- }
my $remotes = $self->{remotes} // [];
if ($lei->{opt}->{thread}) {
for my $ibxish (@$srcs) {
@@ -221,9 +226,11 @@ sub start_query { # always runs in main (lei-daemon) process
sub query_prepare { # called by wq_do
my ($self, $lei) = @_;
my %sig = $lei->atfork_child_wq($self);
+ -p $lei->{0} or die "BUG: \$done pipe expected";
local @SIG{keys %sig} = values %sig;
eval { $lei->{l2m}->do_augment($lei) };
$lei->fail($@) if $@;
+ syswrite($lei->{0}, '.') == 1 or die "do_post_augment trigger: $!";
}
sub sigpipe_handler {
@@ -253,26 +260,31 @@ sub do_query {
$done = PublicInbox::OpPipe->new($done, $done_op, $in_loop);
my $l2m = $lei->{l2m};
if ($l2m) {
- $l2m->pre_augment($lei_orig); # may redirect $lei->{1} for mbox
+ # may redirect $lei->{1} for mbox
+ my $zpipe = $l2m->pre_augment($lei_orig);
$io[1] = $lei_orig->{1};
- my @l2m_io = (undef, @io[1..$#io]);
- pipe(my $startq, $l2m_io[0]) or die "pipe: $!";
- $self->wq_do('query_prepare', \@l2m_io, $lei);
+ pipe(my ($startq, $au_done)) or die "pipe: $!";
+ $done_op->{'.'} = [ \&do_post_augment, $lei_orig,
+ $zpipe, $au_done ];
$io[4] = *STDERR{GLOB}; # don't send l2m->{-wq_s1}
+ $self->wq_do('query_prepare', \@io, $lei);
die "BUG: unexpected \$io[5]: $io[5]" if $io[5];
fcntl($startq, 1031, 4096) if $^O eq 'linux'; # F_SETPIPE_SZ
$io[5] = $startq;
+ $io[1] = $zpipe->[1] if $zpipe;
}
start_query($self, \@io, $lei, $srcs);
unless ($in_loop) {
my @pids = $self->wq_close;
# for the $lei->atfork_child_wq PIPE handler:
$done_op->{'!'}->[3] = \@pids;
- $done->event_step;
+ # $done->event_step;
+ # my $ipc_worker_reap = $self->can('ipc_worker_reap');
+ # if (my $l2m_pids = delete $self->{l2m_pids}) {
+ # dwaitpid($_, $ipc_worker_reap, $l2m) for @$l2m_pids;
+ # }
+ while ($done->{sock}) { $done->event_step }
my $ipc_worker_reap = $self->can('ipc_worker_reap');
- if (my $l2m_pids = delete $self->{l2m_pids}) {
- dwaitpid($_, $ipc_worker_reap, $l2m) for @$l2m_pids;
- }
dwaitpid($_, $ipc_worker_reap, $self) for @pids;
}
}
diff --git a/t/lei.t b/t/lei.t
index c4692217..8eede13e 100644
--- a/t/lei.t
+++ b/t/lei.t
@@ -189,25 +189,35 @@ my $test_external = sub {
# No double-quoting should be imposed on users on the CLI
$lei->('q', 's:use boolean prefix');
like($out, qr/search: use boolean prefix/, 'phrase search got result');
+ require IO::Uncompress::Gunzip;
+ for my $sfx ('', '.gz') {
+ my $f = "$home/mbox$sfx";
+ $lei->('q', '-o', "mboxcl2:$f", 's:use boolean prefix');
+ my $cat = $sfx eq '' ? sub {
+ open my $mb, '<', $f or fail "no mbox: $!";
+ <$mb>
+ } : sub {
+ my $z = IO::Uncompress::Gunzip->new($f, MultiStream=>1);
+ <$z>;
+ };
+ my @s = grep(/^Subject:/, $cat->());
+ is(scalar(@s), 1, "1 result in mbox$sfx");
+ $lei->('q', '-a', '-o', "mboxcl2:$f", 's:see attachment');
+ is($err, '', 'no errors from augment');
+ @s = grep(/^Subject:/, my @wtf = $cat->());
+ is(scalar(@s), 2, "2 results in mbox$sfx");
- $lei->('q', '-o', "mboxcl2:$home/mbox", 's:use boolean prefix');
- open my $mb, '<', "$home/mbox" or fail "no mbox: $!";
- my @s = grep(/^Subject:/, <$mb>);
- is(scalar(@s), 1, '1 result in mbox');
- $lei->('q', '-a', '-o', "mboxcl2:$home/mbox", 's:see attachment');
- is($err, '', 'no errors from augment');
- seek($mb, 0, SEEK_SET) or BAIL_OUT "seek: $!";
- @s = grep(/^Subject:/, <$mb>);
- is(scalar(@s), 2, '2 results in mbox');
+ $lei->('q', '-a', '-o', "mboxcl2:$f", 's:nonexistent');
+ is($err, '', "no errors on no results ($sfx)");
- $lei->('q', '-a', '-o', "mboxcl2:$home/mbox", 's:nonexistent');
- is($err, '', 'no errors on no results');
- seek($mb, 0, SEEK_SET) or BAIL_OUT "seek: $!";
- my @s2 = grep(/^Subject:/, <$mb>);
- is_deeply(\@s2, \@s, 'same 2 old results w/ --augment and bad search');
+ my @s2 = grep(/^Subject:/, $cat->());
+ is_deeply(\@s2, \@s,
+ "same 2 old results w/ --augment and bad search $sfx");
- $lei->('q', '-o', "mboxcl2:$home/mbox", 's:nonexistent');
- is(-s "$home/mbox", 0, 'clobber w/o --augment');
+ $lei->('q', '-o', "mboxcl2:$f", 's:nonexistent');
+ my @res = $cat->();
+ is_deeply(\@res, [], "clobber w/o --augment $sfx");
+ }
};
my $test_lei_common = sub {
diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index e5ac8eac..6673d9a6 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -94,9 +94,9 @@ my $wcb_get = sub {
my $dup = Storable::thaw(Storable::freeze($l2m));
is_deeply($dup, $l2m, "$fmt round-trips through storable");
}
- $l2m->pre_augment($lei);
+ my $zpipe = $l2m->pre_augment($lei);
$l2m->do_augment($lei);
- $l2m->post_augment($lei);
+ $l2m->post_augment($lei, $zpipe);
my $cb = $l2m->write_cb($lei);
delete $lei->{1};
$cb;
next prev parent reply other threads:[~2021-01-19 9:34 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-19 9:34 [PATCH 0/9] lei bugfixes and error handling Eric Wong
2021-01-19 9:34 ` [PATCH 1/9] lei q: start ->mset while query_prepare runs Eric Wong
2021-01-19 9:34 ` [PATCH 2/9] lei q: fix SIGPIPE handling from lei2mail workers Eric Wong
2021-01-19 9:34 ` [PATCH 3/9] lei q: do not spawn MUA early Eric Wong
2021-01-19 9:34 ` [PATCH 4/9] lei: write daemon errors to the sock directory Eric Wong
2021-01-19 9:34 ` Eric Wong [this message]
2021-01-19 9:34 ` [PATCH 6/9] lei_overview: do not write if $lei->{1} is gone Eric Wong
2021-01-19 9:34 ` [PATCH 7/9] t/lei: fix double-running of socket test with oneshot Eric Wong
2021-01-19 9:34 ` [PATCH 8/9] lei: test some likely errors due to misuse Eric Wong
2021-01-19 9:34 ` [PATCH 9/9] lei_overview: start implementing format detection Eric Wong
2021-01-20 5:04 ` [PATCH 0/7] lei: fixes piled higher and deeper Eric Wong
2021-01-20 5:16 ` misnumbered, should be [PATCH 10/9]..[PATCH 16/9] :x Eric Wong
2021-01-20 5:04 ` [PATCH 1/7] lei: allow more mbox inode types Eric Wong
2021-01-20 5:04 ` [PATCH 2/7] lei: exit code in oneshot mode Eric Wong
2021-01-20 5:04 ` [PATCH 3/7] overidx: eidx_prep: fix leftover dbh reference Eric Wong
2021-01-20 5:04 ` [PATCH 4/7] lei q: cleanup store initialization Eric Wong
2021-01-20 5:04 ` [PATCH 5/7] lei: dump and clear errors.log in daemon mode Eric Wong
2021-01-20 5:04 ` [PATCH 6/7] lei_xsearch: keep l2m->{-wq_s1} while preparing query Eric Wong
2021-01-20 5:04 ` [PATCH 7/7] lei_to_mail: call PublicInbox::IPC::DESTROY Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210119093435.17955-6-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).