user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] git: use built-in spawn implementation for vfork
@ 2016-02-27 21:55 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2016-02-27 21:55 UTC (permalink / raw)
  To: meta

This should reduce overhead of spawning git processes
from our long-running httpd and nntpd servers.
---
 lib/PublicInbox/Git.pm            | 16 ++++-----------
 lib/PublicInbox/GitHTTPBackend.pm | 43 +++++++++++++++++----------------------
 lib/PublicInbox/ProcessPipe.pm    | 30 +++++++++++++++++++++++++++
 lib/PublicInbox/Spawn.pm          | 34 ++++++++++++++++++++++++++++++-
 t/spawn.t                         | 36 +++++++++++++++++++++++++++++++-
 5 files changed, 121 insertions(+), 38 deletions(-)
 create mode 100644 lib/PublicInbox/ProcessPipe.pm

diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 5135862..57d17d3 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -11,6 +11,7 @@ use strict;
 use warnings;
 use POSIX qw(dup2);
 require IO::Handle;
+use PublicInbox::Spawn qw(spawn popen_rd);
 
 sub new {
 	my ($class, $git_dir) = @_;
@@ -26,13 +27,8 @@ sub _bidi_pipe {
 	pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
 
 	my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
-	$self->{$pid} = fork;
-	defined $self->{$pid} or fail($self, "fork failed: $!");
-	if ($self->{$pid} == 0) {
-		dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n";
-		dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n";
-		exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n";
-	}
+	my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+	$self->{$pid} = spawn(\@cmd, undef, $redir);
 	close $out_r or fail($self, "close failed: $!");
 	close $in_w or fail($self, "close failed: $!");
 	$out_w->autoflush(1);
@@ -123,12 +119,8 @@ sub fail {
 
 sub popen {
 	my ($self, @cmd) = @_;
-	my $mode = '-|';
-	$mode = shift @cmd if ($cmd[0] eq '|-');
 	@cmd = ('git', "--git-dir=$self->{git_dir}", @cmd);
-	my $pid = open my $fh, $mode, @cmd or
-		die('open `'.join(' ', @cmd) . " pipe failed: $!\n");
-	$fh;
+	popen_rd(\@cmd);
 }
 
 sub cleanup {
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index f8446aa..6e8ad95 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -7,7 +7,7 @@ package PublicInbox::GitHTTPBackend;
 use strict;
 use warnings;
 use Fcntl qw(:seek);
-use POSIX qw(dup2);
+use PublicInbox::Spawn qw(spawn);
 
 # n.b. serving "description" and "cloneurl" should be innocuous enough to
 # not cause problems.  serving "config" might...
@@ -142,31 +142,26 @@ sub serve_smart {
 		$err->print("error creating pipe: $!\n");
 		return r(500);
 	}
-	my $pid = fork; # TODO: vfork under Linux...
-	unless (defined $pid) {
-		$err->print("error forking: $!\n");
-		return r(500);
+	my %env = %ENV;
+	# GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
+	# may be set in the server-process and are passed as-is
+	foreach my $name (qw(QUERY_STRING
+				REMOTE_USER REMOTE_ADDR
+				HTTP_CONTENT_ENCODING
+				CONTENT_TYPE
+				SERVER_PROTOCOL
+				REQUEST_METHOD)) {
+		my $val = $env->{$name};
+		$env{$name} = $val if defined $val;
 	}
 	my $git_dir = $git->{git_dir};
-	if ($pid == 0) {
-		# GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
-		# may be set in the server-process and are passed as-is
-		foreach my $name (qw(QUERY_STRING
-					REMOTE_USER REMOTE_ADDR
-					HTTP_CONTENT_ENCODING
-					CONTENT_TYPE
-					SERVER_PROTOCOL
-					REQUEST_METHOD)) {
-			my $val = $env->{$name};
-			$ENV{$name} = $val if defined $val;
-		}
-		# $ENV{GIT_PROJECT_ROOT} = $git->{git_dir};
-		$ENV{GIT_HTTP_EXPORT_ALL} = '1';
-		$ENV{PATH_TRANSLATED} = "$git_dir/$path";
-		dup2(fileno($in), 0) or die "redirect stdin failed: $!\n";
-		dup2(fileno($wpipe), 1) or die "redirect stdout failed: $!\n";
-		my @cmd = qw(git http-backend);
-		exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
+	$env{GIT_HTTP_EXPORT_ALL} = '1';
+	$env{PATH_TRANSLATED} = "$git_dir/$path";
+	my %rdr = ( 0 => fileno($in), 1 => fileno($wpipe) );
+	my $pid = spawn([qw(git http-backend)], \%env, \%rdr);
+	unless (defined $pid) {
+		$err->print("error spawning: $!\n");
+		return r(500);
 	}
 	$wpipe = $in = undef;
 	$buf = '';
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
new file mode 100644
index 0000000..eade524
--- /dev/null
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -0,0 +1,30 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# a tied handle for auto reaping of children tied to a pipe, see perltie(1)
+package PublicInbox::ProcessPipe;
+use strict;
+use warnings;
+
+sub TIEHANDLE {
+	my ($class, $pid, $fh) = @_;
+	bless { pid => $pid, fh => $fh }, $class;
+}
+
+sub READ { sysread($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
+
+sub READLINE { readline($_[0]->{fh}) }
+
+sub CLOSE { close($_[0]->{fh}) }
+
+sub FILENO { fileno($_[0]->{fh}) }
+
+sub DESTROY {
+	my $fh = delete($_[0]->{fh});
+	close $fh if $fh;
+	waitpid($_[0]->{pid}, 0);
+}
+
+sub pid { $_[0]->{pid} }
+
+1;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index aa8d81b..394a0b4 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -1,10 +1,22 @@
 # Copyright (C) 2016 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# This allows vfork to be used for spawning subprocesses if
+# PERL_INLINE_DIRECTORY is explicitly defined in the environment.
+# Under Linux, vfork can make a big difference in spawning performance
+# as process size increases (fork still needs to mark pages for CoW use).
+# Currently, we only use this for code intended for long running
+# daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
+# scripts (-mda, -index, -learn, -init) either use IPC::run or standard
+# Perl routines.
+
 package PublicInbox::Spawn;
 use strict;
 use warnings;
 use base qw(Exporter);
-our @EXPORT_OK = qw/which spawn/;
+use Symbol qw(gensym);
+use PublicInbox::ProcessPipe;
+our @EXPORT_OK = qw/which spawn popen_rd/;
 
 my $vfork_spawn = <<'VFORK_SPAWN';
 #include <sys/types.h>
@@ -149,4 +161,24 @@ sub spawn ($;$$) {
 	public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env);
 }
 
+sub popen_rd {
+	my ($cmd, $env, $opts) = @_;
+	unless (wantarray || defined $vfork_spawn || defined $env) {
+		open my $fh, '-|', @$cmd or
+			die('open `'.join(' ', @$cmd) . " pipe failed: $!\n");
+		return $fh
+	}
+	pipe(my ($r, $w)) or die "pipe: $!\n";
+	$opts ||= {};
+	my $blocking = $opts->{Blocking};
+	$r->blocking($blocking) if defined $blocking;
+	$opts->{1} = fileno($w);
+	my $pid = spawn($cmd, $env, $opts);
+	close $w;
+	return ($r, $pid) if wantarray;
+	my $ret = gensym;
+	tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r;
+	$ret;
+}
+
 1;
diff --git a/t/spawn.t b/t/spawn.t
index ed9b5b0..d52b646 100644
--- a/t/spawn.t
+++ b/t/spawn.t
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 use Test::More;
-use PublicInbox::Spawn qw(which spawn);
+use PublicInbox::Spawn qw(which spawn popen_rd);
 
 {
 	my $true = which('true');
@@ -48,6 +48,40 @@ use PublicInbox::Spawn qw(which spawn);
 	is($?, 0, 'env(1) exited successfully');
 }
 
+{
+	my $fh = popen_rd([qw(echo hello)]);
+	ok(fileno($fh) >= 0, 'tied fileno works');
+	my $l = <$fh>;
+	is($l, "hello\n", 'tied readline works');
+	$l = <$fh>;
+	ok(!$l, 'tied readline works for EOF');
+}
+
+{
+	my $fh = popen_rd([qw(printf foo\nbar)]);
+	ok(fileno($fh) >= 0, 'tied fileno works');
+	my @line = <$fh>;
+	is_deeply(\@line, [ "foo\n", 'bar' ], 'wantarray works on readline');
+}
+
+{
+	my $fh = popen_rd([qw(echo hello)]);
+	my $buf;
+	is(sysread($fh, $buf, 6), 6, 'sysread got 6 bytes');
+	is($buf, "hello\n", 'tied gets works');
+	is(sysread($fh, $buf, 6), 0, 'sysread got EOF');
+}
+
+{
+	my ($fh, $pid) = popen_rd([qw(sleep 60)], undef, { Blocking => 0 });
+	ok(defined $pid && $pid > 0, 'returned pid when array requested');
+	is(kill(0, $pid), 1, 'child process is running');
+	ok(!defined(sysread($fh, my $buf, 1)) && $!{EAGAIN},
+	   'sysread returned quickly with EAGAIN');
+	is(kill(15, $pid), 1, 'child process killed early');
+	is(waitpid($pid, 0), $pid, 'child process reapable');
+}
+
 done_testing();
 
 1;
-- 
EW


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2016-02-27 21:55 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-27 21:55 [PATCH] git: use built-in spawn implementation for vfork Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).