* [PATCH] git: use built-in spawn implementation for vfork
@ 2016-02-27 21:55 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2016-02-27 21:55 UTC (permalink / raw)
To: meta
This should reduce overhead of spawning git processes
from our long-running httpd and nntpd servers.
---
lib/PublicInbox/Git.pm | 16 ++++-----------
lib/PublicInbox/GitHTTPBackend.pm | 43 +++++++++++++++++----------------------
lib/PublicInbox/ProcessPipe.pm | 30 +++++++++++++++++++++++++++
lib/PublicInbox/Spawn.pm | 34 ++++++++++++++++++++++++++++++-
t/spawn.t | 36 +++++++++++++++++++++++++++++++-
5 files changed, 121 insertions(+), 38 deletions(-)
create mode 100644 lib/PublicInbox/ProcessPipe.pm
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 5135862..57d17d3 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -11,6 +11,7 @@ use strict;
use warnings;
use POSIX qw(dup2);
require IO::Handle;
+use PublicInbox::Spawn qw(spawn popen_rd);
sub new {
my ($class, $git_dir) = @_;
@@ -26,13 +27,8 @@ sub _bidi_pipe {
pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
- $self->{$pid} = fork;
- defined $self->{$pid} or fail($self, "fork failed: $!");
- if ($self->{$pid} == 0) {
- dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n";
- dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n";
- exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n";
- }
+ my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+ $self->{$pid} = spawn(\@cmd, undef, $redir);
close $out_r or fail($self, "close failed: $!");
close $in_w or fail($self, "close failed: $!");
$out_w->autoflush(1);
@@ -123,12 +119,8 @@ sub fail {
sub popen {
my ($self, @cmd) = @_;
- my $mode = '-|';
- $mode = shift @cmd if ($cmd[0] eq '|-');
@cmd = ('git', "--git-dir=$self->{git_dir}", @cmd);
- my $pid = open my $fh, $mode, @cmd or
- die('open `'.join(' ', @cmd) . " pipe failed: $!\n");
- $fh;
+ popen_rd(\@cmd);
}
sub cleanup {
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index f8446aa..6e8ad95 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -7,7 +7,7 @@ package PublicInbox::GitHTTPBackend;
use strict;
use warnings;
use Fcntl qw(:seek);
-use POSIX qw(dup2);
+use PublicInbox::Spawn qw(spawn);
# n.b. serving "description" and "cloneurl" should be innocuous enough to
# not cause problems. serving "config" might...
@@ -142,31 +142,26 @@ sub serve_smart {
$err->print("error creating pipe: $!\n");
return r(500);
}
- my $pid = fork; # TODO: vfork under Linux...
- unless (defined $pid) {
- $err->print("error forking: $!\n");
- return r(500);
+ my %env = %ENV;
+ # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
+ # may be set in the server-process and are passed as-is
+ foreach my $name (qw(QUERY_STRING
+ REMOTE_USER REMOTE_ADDR
+ HTTP_CONTENT_ENCODING
+ CONTENT_TYPE
+ SERVER_PROTOCOL
+ REQUEST_METHOD)) {
+ my $val = $env->{$name};
+ $env{$name} = $val if defined $val;
}
my $git_dir = $git->{git_dir};
- if ($pid == 0) {
- # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
- # may be set in the server-process and are passed as-is
- foreach my $name (qw(QUERY_STRING
- REMOTE_USER REMOTE_ADDR
- HTTP_CONTENT_ENCODING
- CONTENT_TYPE
- SERVER_PROTOCOL
- REQUEST_METHOD)) {
- my $val = $env->{$name};
- $ENV{$name} = $val if defined $val;
- }
- # $ENV{GIT_PROJECT_ROOT} = $git->{git_dir};
- $ENV{GIT_HTTP_EXPORT_ALL} = '1';
- $ENV{PATH_TRANSLATED} = "$git_dir/$path";
- dup2(fileno($in), 0) or die "redirect stdin failed: $!\n";
- dup2(fileno($wpipe), 1) or die "redirect stdout failed: $!\n";
- my @cmd = qw(git http-backend);
- exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
+ $env{GIT_HTTP_EXPORT_ALL} = '1';
+ $env{PATH_TRANSLATED} = "$git_dir/$path";
+ my %rdr = ( 0 => fileno($in), 1 => fileno($wpipe) );
+ my $pid = spawn([qw(git http-backend)], \%env, \%rdr);
+ unless (defined $pid) {
+ $err->print("error spawning: $!\n");
+ return r(500);
}
$wpipe = $in = undef;
$buf = '';
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
new file mode 100644
index 0000000..eade524
--- /dev/null
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -0,0 +1,30 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# a tied handle for auto reaping of children tied to a pipe, see perltie(1)
+package PublicInbox::ProcessPipe;
+use strict;
+use warnings;
+
+sub TIEHANDLE {
+ my ($class, $pid, $fh) = @_;
+ bless { pid => $pid, fh => $fh }, $class;
+}
+
+sub READ { sysread($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
+
+sub READLINE { readline($_[0]->{fh}) }
+
+sub CLOSE { close($_[0]->{fh}) }
+
+sub FILENO { fileno($_[0]->{fh}) }
+
+sub DESTROY {
+ my $fh = delete($_[0]->{fh});
+ close $fh if $fh;
+ waitpid($_[0]->{pid}, 0);
+}
+
+sub pid { $_[0]->{pid} }
+
+1;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index aa8d81b..394a0b4 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -1,10 +1,22 @@
# Copyright (C) 2016 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# This allows vfork to be used for spawning subprocesses if
+# PERL_INLINE_DIRECTORY is explicitly defined in the environment.
+# Under Linux, vfork can make a big difference in spawning performance
+# as process size increases (fork still needs to mark pages for CoW use).
+# Currently, we only use this for code intended for long running
+# daemons (inside the PSGI code (-httpd) and -nntpd). The short-lived
+# scripts (-mda, -index, -learn, -init) either use IPC::run or standard
+# Perl routines.
+
package PublicInbox::Spawn;
use strict;
use warnings;
use base qw(Exporter);
-our @EXPORT_OK = qw/which spawn/;
+use Symbol qw(gensym);
+use PublicInbox::ProcessPipe;
+our @EXPORT_OK = qw/which spawn popen_rd/;
my $vfork_spawn = <<'VFORK_SPAWN';
#include <sys/types.h>
@@ -149,4 +161,24 @@ sub spawn ($;$$) {
public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env);
}
+sub popen_rd {
+ my ($cmd, $env, $opts) = @_;
+ unless (wantarray || defined $vfork_spawn || defined $env) {
+ open my $fh, '-|', @$cmd or
+ die('open `'.join(' ', @$cmd) . " pipe failed: $!\n");
+ return $fh
+ }
+ pipe(my ($r, $w)) or die "pipe: $!\n";
+ $opts ||= {};
+ my $blocking = $opts->{Blocking};
+ $r->blocking($blocking) if defined $blocking;
+ $opts->{1} = fileno($w);
+ my $pid = spawn($cmd, $env, $opts);
+ close $w;
+ return ($r, $pid) if wantarray;
+ my $ret = gensym;
+ tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r;
+ $ret;
+}
+
1;
diff --git a/t/spawn.t b/t/spawn.t
index ed9b5b0..d52b646 100644
--- a/t/spawn.t
+++ b/t/spawn.t
@@ -3,7 +3,7 @@
use strict;
use warnings;
use Test::More;
-use PublicInbox::Spawn qw(which spawn);
+use PublicInbox::Spawn qw(which spawn popen_rd);
{
my $true = which('true');
@@ -48,6 +48,40 @@ use PublicInbox::Spawn qw(which spawn);
is($?, 0, 'env(1) exited successfully');
}
+{
+ my $fh = popen_rd([qw(echo hello)]);
+ ok(fileno($fh) >= 0, 'tied fileno works');
+ my $l = <$fh>;
+ is($l, "hello\n", 'tied readline works');
+ $l = <$fh>;
+ ok(!$l, 'tied readline works for EOF');
+}
+
+{
+ my $fh = popen_rd([qw(printf foo\nbar)]);
+ ok(fileno($fh) >= 0, 'tied fileno works');
+ my @line = <$fh>;
+ is_deeply(\@line, [ "foo\n", 'bar' ], 'wantarray works on readline');
+}
+
+{
+ my $fh = popen_rd([qw(echo hello)]);
+ my $buf;
+ is(sysread($fh, $buf, 6), 6, 'sysread got 6 bytes');
+ is($buf, "hello\n", 'tied gets works');
+ is(sysread($fh, $buf, 6), 0, 'sysread got EOF');
+}
+
+{
+ my ($fh, $pid) = popen_rd([qw(sleep 60)], undef, { Blocking => 0 });
+ ok(defined $pid && $pid > 0, 'returned pid when array requested');
+ is(kill(0, $pid), 1, 'child process is running');
+ ok(!defined(sysread($fh, my $buf, 1)) && $!{EAGAIN},
+ 'sysread returned quickly with EAGAIN');
+ is(kill(15, $pid), 1, 'child process killed early');
+ is(waitpid($pid, 0), $pid, 'child process reapable');
+}
+
done_testing();
1;
--
EW
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2016-02-27 21:55 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-27 21:55 [PATCH] git: use built-in spawn implementation for vfork Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).