From ca885bd5905b7faa9ecb7b0eb02476de1d3a7f88 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 27 Feb 2016 02:14:23 +0000 Subject: initial spawn implementation using vfork Under Linux, vfork maintains constant performance as parent process size increases. fork needs to prepare pages for copy-on-write, requiring a linear scan of the address space. --- lib/PublicInbox/Spawn.pm | 152 +++++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/SpawnPP.pm | 33 ++++++++++ t/spawn.t | 53 ++++++++++++++++ 3 files changed, 238 insertions(+) create mode 100644 lib/PublicInbox/Spawn.pm create mode 100644 lib/PublicInbox/SpawnPP.pm create mode 100644 t/spawn.t diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm new file mode 100644 index 00000000..aa8d81b3 --- /dev/null +++ b/lib/PublicInbox/Spawn.pm @@ -0,0 +1,152 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +package PublicInbox::Spawn; +use strict; +use warnings; +use base qw(Exporter); +our @EXPORT_OK = qw/which spawn/; + +my $vfork_spawn = <<'VFORK_SPAWN'; +#include +#include +#include +#include + +#define AV_ALLOCA(av, max) alloca((max = (av_len((av)) + 1)) * sizeof(char *)) + +static void av2c_copy(char **dst, AV *src, I32 max) +{ + I32 i; + + for (i = 0; i < max; i++) { + SV **sv = av_fetch(src, i, 0); + dst[i] = sv ? SvPV_nolen(*sv) : 0; + } + dst[max] = 0; +} + +static void *deconst(const char *s) +{ + union { const char *in; void *out; } u; + u.in = s; + return u.out; +} + +/* needs to be safe inside a vfork'ed process */ +static void xerr(const char *msg) +{ + struct iovec iov[3]; + const char *err = strerror(errno); /* should be safe in practice */ + + iov[0].iov_base = deconst(msg); + iov[0].iov_len = strlen(msg); + iov[1].iov_base = deconst(err); + iov[1].iov_len = strlen(err); + iov[2].iov_base = deconst("\n"); + iov[2].iov_len = 1; + writev(2, iov, 3); + _exit(1); +} + +#define REDIR(var,fd) do { \ + if (var != fd && dup2(var, fd) < 0) \ + xerr("error redirecting std"#var ": "); \ +} while (0) + +/* + * unstable internal API. This was easy to implement but does not + * support arbitrary redirects. It'll be updated depending on + * whatever we'll need in the future. + * Be sure to update PublicInbox::SpawnPP if this changes + */ +int public_inbox_fork_exec(int in, int out, int err, + SV *file, SV *cmdref, SV *envref) +{ + AV *cmd = (AV *)SvRV(cmdref); + AV *env = (AV *)SvRV(envref); + const char *filename = SvPV_nolen(file); + pid_t pid; + char **argv, **envp; + I32 max; + + argv = AV_ALLOCA(cmd, max); + av2c_copy(argv, cmd, max); + + envp = AV_ALLOCA(env, max); + av2c_copy(envp, env, max); + + pid = vfork(); + if (pid == 0) { + REDIR(in, 0); + REDIR(out, 1); + REDIR(err, 2); + execve(filename, argv, envp); + xerr("execve failed"); + } + + return (int)pid; +} +VFORK_SPAWN + +my $inline_dir = $ENV{PERL_INLINE_DIRECTORY}; +$vfork_spawn = undef unless defined $inline_dir && -d $inline_dir && -w _; +if (defined $vfork_spawn) { + # Inline 0.64 or later has locking in multi-process env, + # but we support 0.5 on Debian wheezy + use Fcntl qw(:flock); + eval { + my $f = "$inline_dir/.public-inbox.lock"; + open my $fh, '>', $f or die "failed to open $f: $!\n"; + flock($fh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n"; + eval 'use Inline C => $vfork_spawn'; + flock($fh, LOCK_UN) or die "LOCK_UN failed on $f: $!\n"; + }; + if ($@) { + warn "Inline::C failed for vfork: $@\n"; + $vfork_spawn = undef; + } +} + +unless (defined $vfork_spawn) { + require PublicInbox::SpawnPP; + no warnings 'once'; + *public_inbox_fork_exec = *PublicInbox::SpawnPP::public_inbox_fork_exec +} + +sub which ($) { + my ($file) = @_; + foreach my $p (split(':', $ENV{PATH})) { + $p .= "/$file"; + return $p if -x $p; + } + undef; +} + +sub spawn ($;$$) { + my ($cmd, $env, $opts) = @_; + my $f = which($cmd->[0]); + defined $f or die "$cmd->[0]: command not found\n"; + my @env; + $opts ||= {}; + + my %env = $opts->{-env} ? () : %ENV; + if ($env) { + foreach my $k (keys %$env) { + my $v = $env->{$k}; + if (defined $v) { + $env{$k} = $v; + } else { + delete $env{$k}; + } + } + } + while (my ($k, $v) = each %env) { + push @env, "$k=$v"; + } + my $in = $opts->{0} || 0; + my $out = $opts->{1} || 1; + my $err = $opts->{2} || 2; + public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env); +} + +1; diff --git a/lib/PublicInbox/SpawnPP.pm b/lib/PublicInbox/SpawnPP.pm new file mode 100644 index 00000000..ae552dd8 --- /dev/null +++ b/lib/PublicInbox/SpawnPP.pm @@ -0,0 +1,33 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +package PublicInbox::SpawnPP; +use strict; +use warnings; +use POSIX qw(dup2); + +# Pure Perl implementation for folks that do not use Inline::C +sub public_inbox_fork_exec ($$$$$$) { + my ($in, $out, $err, $f, $cmd, $env) = @_; + my $pid = fork; + if ($pid == 0) { + if ($in != 0) { + dup2($in, 0) or die "dup2 failed for stdin: $!"; + } + if ($out != 1) { + dup2($out, 1) or die "dup2 failed for stdout: $!"; + } + if ($err != 2) { + dup2($err, 2) or die "dup2 failed for stderr$!"; + } + %ENV = (); + foreach my $e (@$env) { + my ($k, $v) = split('=', $e, 2); + $ENV{$k} = $v; + } + exec @$cmd; + exit 1; + } + $pid; +} + +1; diff --git a/t/spawn.t b/t/spawn.t new file mode 100644 index 00000000..ed9b5b08 --- /dev/null +++ b/t/spawn.t @@ -0,0 +1,53 @@ +# Copyright (C) 2015 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use PublicInbox::Spawn qw(which spawn); + +{ + my $true = which('true'); + ok($true, "'true' command found with which()"); +} + +{ + my $pid = spawn(['true']); + ok($pid, 'spawned process'); + is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); + is($?, 0, 'true exited successfully'); +} + +{ + my ($r, $w); + pipe $r, $w or die "pipe failed: $!"; + my $pid = spawn(['echo', 'hello world'], undef, { 1 => fileno($w) }); + close $w or die "close pipe[1] failed: $!"; + is(<$r>, "hello world\n", 'read stdout of spawned from pipe'); + is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); + is($?, 0, 'true exited successfully'); +} + +{ + my ($r, $w); + pipe $r, $w or die "pipe failed: $!"; + my $pid = spawn(['sh', '-c', 'echo $HELLO'], + { 'HELLO' => 'world' }, { 1 => fileno($w) }); + close $w or die "close pipe[1] failed: $!"; + is(<$r>, "world\n", 'read stdout of spawned from pipe'); + is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); + is($?, 0, 'sh exited successfully'); +} + +{ + my ($r, $w); + pipe $r, $w or die "pipe failed: $!"; + my $pid = spawn(['env'], {}, { -env => 1, 1 => fileno($w) }); + close $w or die "close pipe[1] failed: $!"; + ok(!defined(<$r>), 'read stdout of spawned from pipe'); + is(waitpid($pid, 0), $pid, 'waitpid succeeds on spawned process'); + is($?, 0, 'env(1) exited successfully'); +} + +done_testing(); + +1; -- cgit v1.2.3-24-ge0c7