From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 3F4DC20310 for ; Sat, 27 Feb 2016 21:55:07 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] git: use built-in spawn implementation for vfork Date: Sat, 27 Feb 2016 21:55:07 +0000 Message-Id: <20160227215507.13946-1-e@80x24.org> List-Id: This should reduce overhead of spawning git processes from our long-running httpd and nntpd servers. --- lib/PublicInbox/Git.pm | 16 ++++----------- lib/PublicInbox/GitHTTPBackend.pm | 43 +++++++++++++++++---------------------- lib/PublicInbox/ProcessPipe.pm | 30 +++++++++++++++++++++++++++ lib/PublicInbox/Spawn.pm | 34 ++++++++++++++++++++++++++++++- t/spawn.t | 36 +++++++++++++++++++++++++++++++- 5 files changed, 121 insertions(+), 38 deletions(-) create mode 100644 lib/PublicInbox/ProcessPipe.pm diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 5135862..57d17d3 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -11,6 +11,7 @@ use strict; use warnings; use POSIX qw(dup2); require IO::Handle; +use PublicInbox::Spawn qw(spawn popen_rd); sub new { my ($class, $git_dir) = @_; @@ -26,13 +27,8 @@ sub _bidi_pipe { pipe($out_r, $out_w) or fail($self, "pipe failed: $!"); my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch); - $self->{$pid} = fork; - defined $self->{$pid} or fail($self, "fork failed: $!"); - if ($self->{$pid} == 0) { - dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n"; - dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n"; - exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n"; - } + my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) }; + $self->{$pid} = spawn(\@cmd, undef, $redir); close $out_r or fail($self, "close failed: $!"); close $in_w or fail($self, "close failed: $!"); $out_w->autoflush(1); @@ -123,12 +119,8 @@ sub fail { sub popen { my ($self, @cmd) = @_; - my $mode = '-|'; - $mode = shift @cmd if ($cmd[0] eq '|-'); @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd); - my $pid = open my $fh, $mode, @cmd or - die('open `'.join(' ', @cmd) . " pipe failed: $!\n"); - $fh; + popen_rd(\@cmd); } sub cleanup { diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm index f8446aa..6e8ad95 100644 --- a/lib/PublicInbox/GitHTTPBackend.pm +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -7,7 +7,7 @@ package PublicInbox::GitHTTPBackend; use strict; use warnings; use Fcntl qw(:seek); -use POSIX qw(dup2); +use PublicInbox::Spawn qw(spawn); # n.b. serving "description" and "cloneurl" should be innocuous enough to # not cause problems. serving "config" might... @@ -142,31 +142,26 @@ sub serve_smart { $err->print("error creating pipe: $!\n"); return r(500); } - my $pid = fork; # TODO: vfork under Linux... - unless (defined $pid) { - $err->print("error forking: $!\n"); - return r(500); + my %env = %ENV; + # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL + # may be set in the server-process and are passed as-is + foreach my $name (qw(QUERY_STRING + REMOTE_USER REMOTE_ADDR + HTTP_CONTENT_ENCODING + CONTENT_TYPE + SERVER_PROTOCOL + REQUEST_METHOD)) { + my $val = $env->{$name}; + $env{$name} = $val if defined $val; } my $git_dir = $git->{git_dir}; - if ($pid == 0) { - # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL - # may be set in the server-process and are passed as-is - foreach my $name (qw(QUERY_STRING - REMOTE_USER REMOTE_ADDR - HTTP_CONTENT_ENCODING - CONTENT_TYPE - SERVER_PROTOCOL - REQUEST_METHOD)) { - my $val = $env->{$name}; - $ENV{$name} = $val if defined $val; - } - # $ENV{GIT_PROJECT_ROOT} = $git->{git_dir}; - $ENV{GIT_HTTP_EXPORT_ALL} = '1'; - $ENV{PATH_TRANSLATED} = "$git_dir/$path"; - dup2(fileno($in), 0) or die "redirect stdin failed: $!\n"; - dup2(fileno($wpipe), 1) or die "redirect stdout failed: $!\n"; - my @cmd = qw(git http-backend); - exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n"; + $env{GIT_HTTP_EXPORT_ALL} = '1'; + $env{PATH_TRANSLATED} = "$git_dir/$path"; + my %rdr = ( 0 => fileno($in), 1 => fileno($wpipe) ); + my $pid = spawn([qw(git http-backend)], \%env, \%rdr); + unless (defined $pid) { + $err->print("error spawning: $!\n"); + return r(500); } $wpipe = $in = undef; $buf = ''; diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm new file mode 100644 index 0000000..eade524 --- /dev/null +++ b/lib/PublicInbox/ProcessPipe.pm @@ -0,0 +1,30 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# a tied handle for auto reaping of children tied to a pipe, see perltie(1) +package PublicInbox::ProcessPipe; +use strict; +use warnings; + +sub TIEHANDLE { + my ($class, $pid, $fh) = @_; + bless { pid => $pid, fh => $fh }, $class; +} + +sub READ { sysread($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) } + +sub READLINE { readline($_[0]->{fh}) } + +sub CLOSE { close($_[0]->{fh}) } + +sub FILENO { fileno($_[0]->{fh}) } + +sub DESTROY { + my $fh = delete($_[0]->{fh}); + close $fh if $fh; + waitpid($_[0]->{pid}, 0); +} + +sub pid { $_[0]->{pid} } + +1; diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm index aa8d81b..394a0b4 100644 --- a/lib/PublicInbox/Spawn.pm +++ b/lib/PublicInbox/Spawn.pm @@ -1,10 +1,22 @@ # Copyright (C) 2016 all contributors # License: AGPL-3.0+ +# +# This allows vfork to be used for spawning subprocesses if +# PERL_INLINE_DIRECTORY is explicitly defined in the environment. +# Under Linux, vfork can make a big difference in spawning performance +# as process size increases (fork still needs to mark pages for CoW use). +# Currently, we only use this for code intended for long running +# daemons (inside the PSGI code (-httpd) and -nntpd). The short-lived +# scripts (-mda, -index, -learn, -init) either use IPC::run or standard +# Perl routines. + package PublicInbox::Spawn; use strict; use warnings; use base qw(Exporter); -our @EXPORT_OK = qw/which spawn/; +use Symbol qw(gensym); +use PublicInbox::ProcessPipe; +our @EXPORT_OK = qw/which spawn popen_rd/; my $vfork_spawn = <<'VFORK_SPAWN'; #include @@ -149,4 +161,24 @@ sub spawn ($;$$) { public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env); } +sub popen_rd { + my ($cmd, $env, $opts) = @_; + unless (wantarray || defined $vfork_spawn || defined $env) { + open my $fh, '-|', @$cmd or + die('open `'.join(' ', @$cmd) . " pipe failed: $!\n"); + return $fh + } + pipe(my ($r, $w)) or die "pipe: $!\n"; + $opts ||= {}; + my $blocking = $opts->{Blocking}; + $r->blocking($blocking) if defined $blocking; + $opts->{1} = fileno($w); + my $pid = spawn($cmd, $env, $opts); + close $w; + return ($r, $pid) if wantarray; + my $ret = gensym; + tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r; + $ret; +} + 1; diff --git a/t/spawn.t b/t/spawn.t index ed9b5b0..d52b646 100644 --- a/t/spawn.t +++ b/t/spawn.t @@ -3,7 +3,7 @@ use strict; use warnings; use Test::More; -use PublicInbox::Spawn qw(which spawn); +use PublicInbox::Spawn qw(which spawn popen_rd); { my $true = which('true'); @@ -48,6 +48,40 @@ use PublicInbox::Spawn qw(which spawn); is($?, 0, 'env(1) exited successfully'); } +{ + my $fh = popen_rd([qw(echo hello)]); + ok(fileno($fh) >= 0, 'tied fileno works'); + my $l = <$fh>; + is($l, "hello\n", 'tied readline works'); + $l = <$fh>; + ok(!$l, 'tied readline works for EOF'); +} + +{ + my $fh = popen_rd([qw(printf foo\nbar)]); + ok(fileno($fh) >= 0, 'tied fileno works'); + my @line = <$fh>; + is_deeply(\@line, [ "foo\n", 'bar' ], 'wantarray works on readline'); +} + +{ + my $fh = popen_rd([qw(echo hello)]); + my $buf; + is(sysread($fh, $buf, 6), 6, 'sysread got 6 bytes'); + is($buf, "hello\n", 'tied gets works'); + is(sysread($fh, $buf, 6), 0, 'sysread got EOF'); +} + +{ + my ($fh, $pid) = popen_rd([qw(sleep 60)], undef, { Blocking => 0 }); + ok(defined $pid && $pid > 0, 'returned pid when array requested'); + is(kill(0, $pid), 1, 'child process is running'); + ok(!defined(sysread($fh, my $buf, 1)) && $!{EAGAIN}, + 'sysread returned quickly with EAGAIN'); + is(kill(15, $pid), 1, 'child process killed early'); + is(waitpid($pid, 0), $pid, 'child process reapable'); +} + done_testing(); 1; -- EW