about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-02-27 21:31:24 +0000
committerEric Wong <e@80x24.org>2016-02-27 21:51:39 +0000
commit617f35dacbd4e5972bf2d82411b45009bbc79a42 (patch)
tree0a763db89c81941f16dbd16761a35602f3c723c9 /lib
parentca885bd5905b7faa9ecb7b0eb02476de1d3a7f88 (diff)
downloadpublic-inbox-617f35dacbd4e5972bf2d82411b45009bbc79a42.tar.gz
This should reduce overhead of spawning git processes
from our long-running httpd and nntpd servers.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Git.pm16
-rw-r--r--lib/PublicInbox/GitHTTPBackend.pm43
-rw-r--r--lib/PublicInbox/ProcessPipe.pm30
-rw-r--r--lib/PublicInbox/Spawn.pm34
4 files changed, 86 insertions, 37 deletions
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 5135862e..57d17d33 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -11,6 +11,7 @@ use strict;
 use warnings;
 use POSIX qw(dup2);
 require IO::Handle;
+use PublicInbox::Spawn qw(spawn popen_rd);
 
 sub new {
         my ($class, $git_dir) = @_;
@@ -26,13 +27,8 @@ sub _bidi_pipe {
         pipe($out_r, $out_w) or fail($self, "pipe failed: $!");
 
         my @cmd = ('git', "--git-dir=$self->{git_dir}", qw(cat-file), $batch);
-        $self->{$pid} = fork;
-        defined $self->{$pid} or fail($self, "fork failed: $!");
-        if ($self->{$pid} == 0) {
-                dup2(fileno($out_r), 0) or die "redirect stdin failed: $!\n";
-                dup2(fileno($in_w), 1) or die "redirect stdout failed: $!\n";
-                exec(@cmd) or die 'exec `' . join(' '). "' failed: $!\n";
-        }
+        my $redir = { 0 => fileno($out_r), 1 => fileno($in_w) };
+        $self->{$pid} = spawn(\@cmd, undef, $redir);
         close $out_r or fail($self, "close failed: $!");
         close $in_w or fail($self, "close failed: $!");
         $out_w->autoflush(1);
@@ -123,12 +119,8 @@ sub fail {
 
 sub popen {
         my ($self, @cmd) = @_;
-        my $mode = '-|';
-        $mode = shift @cmd if ($cmd[0] eq '|-');
         @cmd = ('git', "--git-dir=$self->{git_dir}", @cmd);
-        my $pid = open my $fh, $mode, @cmd or
-                die('open `'.join(' ', @cmd) . " pipe failed: $!\n");
-        $fh;
+        popen_rd(\@cmd);
 }
 
 sub cleanup {
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index f8446aa0..6e8ad955 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -7,7 +7,7 @@ package PublicInbox::GitHTTPBackend;
 use strict;
 use warnings;
 use Fcntl qw(:seek);
-use POSIX qw(dup2);
+use PublicInbox::Spawn qw(spawn);
 
 # n.b. serving "description" and "cloneurl" should be innocuous enough to
 # not cause problems.  serving "config" might...
@@ -142,31 +142,26 @@ sub serve_smart {
                 $err->print("error creating pipe: $!\n");
                 return r(500);
         }
-        my $pid = fork; # TODO: vfork under Linux...
-        unless (defined $pid) {
-                $err->print("error forking: $!\n");
-                return r(500);
+        my %env = %ENV;
+        # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
+        # may be set in the server-process and are passed as-is
+        foreach my $name (qw(QUERY_STRING
+                                REMOTE_USER REMOTE_ADDR
+                                HTTP_CONTENT_ENCODING
+                                CONTENT_TYPE
+                                SERVER_PROTOCOL
+                                REQUEST_METHOD)) {
+                my $val = $env->{$name};
+                $env{$name} = $val if defined $val;
         }
         my $git_dir = $git->{git_dir};
-        if ($pid == 0) {
-                # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
-                # may be set in the server-process and are passed as-is
-                foreach my $name (qw(QUERY_STRING
-                                        REMOTE_USER REMOTE_ADDR
-                                        HTTP_CONTENT_ENCODING
-                                        CONTENT_TYPE
-                                        SERVER_PROTOCOL
-                                        REQUEST_METHOD)) {
-                        my $val = $env->{$name};
-                        $ENV{$name} = $val if defined $val;
-                }
-                # $ENV{GIT_PROJECT_ROOT} = $git->{git_dir};
-                $ENV{GIT_HTTP_EXPORT_ALL} = '1';
-                $ENV{PATH_TRANSLATED} = "$git_dir/$path";
-                dup2(fileno($in), 0) or die "redirect stdin failed: $!\n";
-                dup2(fileno($wpipe), 1) or die "redirect stdout failed: $!\n";
-                my @cmd = qw(git http-backend);
-                exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
+        $env{GIT_HTTP_EXPORT_ALL} = '1';
+        $env{PATH_TRANSLATED} = "$git_dir/$path";
+        my %rdr = ( 0 => fileno($in), 1 => fileno($wpipe) );
+        my $pid = spawn([qw(git http-backend)], \%env, \%rdr);
+        unless (defined $pid) {
+                $err->print("error spawning: $!\n");
+                return r(500);
         }
         $wpipe = $in = undef;
         $buf = '';
diff --git a/lib/PublicInbox/ProcessPipe.pm b/lib/PublicInbox/ProcessPipe.pm
new file mode 100644
index 00000000..eade524c
--- /dev/null
+++ b/lib/PublicInbox/ProcessPipe.pm
@@ -0,0 +1,30 @@
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# a tied handle for auto reaping of children tied to a pipe, see perltie(1)
+package PublicInbox::ProcessPipe;
+use strict;
+use warnings;
+
+sub TIEHANDLE {
+        my ($class, $pid, $fh) = @_;
+        bless { pid => $pid, fh => $fh }, $class;
+}
+
+sub READ { sysread($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
+
+sub READLINE { readline($_[0]->{fh}) }
+
+sub CLOSE { close($_[0]->{fh}) }
+
+sub FILENO { fileno($_[0]->{fh}) }
+
+sub DESTROY {
+        my $fh = delete($_[0]->{fh});
+        close $fh if $fh;
+        waitpid($_[0]->{pid}, 0);
+}
+
+sub pid { $_[0]->{pid} }
+
+1;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index aa8d81b3..394a0b45 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -1,10 +1,22 @@
 # Copyright (C) 2016 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# This allows vfork to be used for spawning subprocesses if
+# PERL_INLINE_DIRECTORY is explicitly defined in the environment.
+# Under Linux, vfork can make a big difference in spawning performance
+# as process size increases (fork still needs to mark pages for CoW use).
+# Currently, we only use this for code intended for long running
+# daemons (inside the PSGI code (-httpd) and -nntpd).  The short-lived
+# scripts (-mda, -index, -learn, -init) either use IPC::run or standard
+# Perl routines.
+
 package PublicInbox::Spawn;
 use strict;
 use warnings;
 use base qw(Exporter);
-our @EXPORT_OK = qw/which spawn/;
+use Symbol qw(gensym);
+use PublicInbox::ProcessPipe;
+our @EXPORT_OK = qw/which spawn popen_rd/;
 
 my $vfork_spawn = <<'VFORK_SPAWN';
 #include <sys/types.h>
@@ -149,4 +161,24 @@ sub spawn ($;$$) {
         public_inbox_fork_exec($in, $out, $err, $f, $cmd, \@env);
 }
 
+sub popen_rd {
+        my ($cmd, $env, $opts) = @_;
+        unless (wantarray || defined $vfork_spawn || defined $env) {
+                open my $fh, '-|', @$cmd or
+                        die('open `'.join(' ', @$cmd) . " pipe failed: $!\n");
+                return $fh
+        }
+        pipe(my ($r, $w)) or die "pipe: $!\n";
+        $opts ||= {};
+        my $blocking = $opts->{Blocking};
+        $r->blocking($blocking) if defined $blocking;
+        $opts->{1} = fileno($w);
+        my $pid = spawn($cmd, $env, $opts);
+        close $w;
+        return ($r, $pid) if wantarray;
+        my $ret = gensym;
+        tie *$ret, 'PublicInbox::ProcessPipe', $pid, $r;
+        $ret;
+}
+
 1;