From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 302E72141A for ; Wed, 30 Jan 2019 04:44:31 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 02/11] t/check-www-inbox: replace IPC::Run with PublicInbox::Spawn Date: Wed, 30 Jan 2019 04:44:21 +0000 Message-Id: <20190130044430.28189-3-e@80x24.org> In-Reply-To: <20190130044430.28189-1-e@80x24.org> References: <20190130044430.28189-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Because WWW::Mechanize uses truckload of memory, fork needs to prepare all that memory for CoW, which ends up bailing with ENOMEM. --- t/check-www-inbox.perl | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/t/check-www-inbox.perl b/t/check-www-inbox.perl index 1e88e95..933362a 100644 --- a/t/check-www-inbox.perl +++ b/t/check-www-inbox.perl @@ -14,6 +14,12 @@ use POSIX qw(:sys_wait_h); use Time::HiRes qw(gettimeofday tv_interval); use WWW::Mechanize; use Data::Dumper; + +# we want to use vfork+exec with spawn, WWW::Mechanize can use too much +# memory and fork(2) fails +use PublicInbox::Spawn qw(spawn which); +$ENV{PERL_INLINE_DIRECTORY} or warn "PERL_INLINE_DIRECTORY unset, may OOM\n"; + our $tmp_owner = $$; my $nproc = 4; my $slow = 0.5; @@ -24,13 +30,35 @@ my %opts = ( GetOptions(%opts) or die "bad command-line args\n$usage"; my $root_url = shift or die $usage; -chomp(my $xmlstarlet = `which xmlstarlet 2>/dev/null`); +chomp(my $xmlstarlet = which('xmlstarlet')); my $atom_check = eval { - require IPC::Run; my $cmd = [ qw(xmlstarlet val -e -) ]; sub { my ($in, $out, $err) = @_; - IPC::Run::run($cmd, $in, $out, $err); + use autodie; + open my $in_fh, '+>', undef; + open my $out_fh, '+>', undef; + open my $err_fh, '+>', undef; + print $in_fh $$in; + $in_fh->flush; + sysseek($in_fh, 0, 0); + my $rdr = { + 0 => fileno($in_fh), + 1 => fileno($out_fh), + 2 => fileno($err_fh), + }; + my $pid = spawn($cmd, undef, $rdr); + defined $pid or die "spawn failure: $!"; + while (waitpid($pid, 0) != $pid) { + next if $!{EINTR}; + warn "waitpid(xmlstarlet, $pid) $!"; + return $!; + } + sysseek($out_fh, 0, 0); + sysread($out_fh, $$out, -s $out_fh); + sysseek($err_fh, 0, 0); + sysread($err_fh, $$err, -s $err_fh); + $? } } if $xmlstarlet; @@ -120,6 +148,7 @@ while (keys %workers) { # reacts to SIGCHLD sub worker_loop { my ($todo_rd, $done_wr) = @_; + $SIG{CHLD} = 'DEFAULT'; my $m = WWW::Mechanize->new(autocheck => 0); my $cc = LWP::ConnCache->new; $m->conn_cache($cc); @@ -164,8 +193,8 @@ sub worker_loop { if ($atom_check && $ct =~ m!\bapplication/atom\+xml\b!) { my $raw = $r->decoded_content; my ($out, $err) = ('', ''); - $atom_check->(\$raw, \$out, \$err) and - warn "Atom ($?) - $u - <1:$out> <2:$err>\n"; + my $fail = $atom_check->(\$raw, \$out, \$err); + warn "Atom ($fail) - $u - <1:$out> <2:$err>\n" if $fail; } next if $ct !~ m!\btext/html\b!; -- EW