user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/3] migrate git-http-backend to async use
@ 2016-02-25  4:02 Eric Wong
  2016-02-25  4:02 ` [PATCH 1/3] use pipe for git-http-backend output Eric Wong
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Eric Wong @ 2016-02-25  4:02 UTC (permalink / raw)
  To: meta

git-http-backend (and likely other operations) can take a while
before it returns data.  Do not block synchrously on it.

Eric Wong (3):
      use pipe for git-http-backend output
      git-http-backend: start refactoring to use callback
      git-http-backend: start async API for streaming

 lib/PublicInbox/GitHTTPBackend.pm | 90 +++++++++++++++++++++++++--------------
 public-inbox-httpd                | 31 ++++++++++++++
 2 files changed, 88 insertions(+), 33 deletions(-)


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3] use pipe for git-http-backend output
  2016-02-25  4:02 [PATCH 0/3] migrate git-http-backend to async use Eric Wong
@ 2016-02-25  4:02 ` Eric Wong
  2016-02-25  4:02 ` [PATCH 2/3] git-http-backend: start refactoring to use callback Eric Wong
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-02-25  4:02 UTC (permalink / raw)
  To: meta

This allows us to stream the output to the client without buffering
everything up-front.  Next, we'll let Danga::Socket (or AE in the
future) wait for readability.
---
 lib/PublicInbox/GitHTTPBackend.pm | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 562c290..cba025e 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -132,9 +132,9 @@ sub serve_smart {
 	my $buf;
 	my $in;
 	my $err = $env->{'psgi.errors'};
-	if (fileno($input) >= 0) { # FIXME untested
+	if (fileno($input) >= 0) {
 		$in = $input;
-	} else {
+	} else { # FIXME untested
 		$in = IO::File->new_tmpfile;
 		while (1) {
 			my $r = $input->read($buf, 8192);
@@ -148,7 +148,11 @@ sub serve_smart {
 		$in->flush;
 		$in->sysseek(0, SEEK_SET);
 	}
-	my $out = IO::File->new_tmpfile;
+	my ($rpipe, $wpipe);
+	unless (pipe($rpipe, $wpipe)) {
+		$err->print('error creating pipe', $!, "\n");
+		return r(500);
+	}
 	my $pid = fork; # TODO: vfork under Linux...
 	unless (defined $pid) {
 		$err->print('error forking: ', $!, "\n");
@@ -170,22 +174,17 @@ sub serve_smart {
 		$ENV{GIT_HTTP_EXPORT_ALL} = '1';
 		$ENV{PATH_TRANSLATED} = "$git->{git_dir}/$path";
 		dup2(fileno($in), 0) or die "redirect stdin failed: $!\n";
-		dup2(fileno($out), 1) or die "redirect stdout failed: $!\n";
+		dup2(fileno($wpipe), 1) or die "redirect stdout failed: $!\n";
 		my @cmd = qw(git http-backend);
 		exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
 	}
-
-	if (waitpid($pid, 0) != $pid) {
-		$err->print("git http-backend ($git->{git_dir}): ", $?, "\n");
-		return r(500);
-	}
+	$wpipe = undef;
 	$in = undef;
-	$out->seek(0, SEEK_SET);
 	my @h;
 	my $code = 200;
 	{
 		local $/ = "\r\n";
-		while (defined(my $line = <$out>)) {
+		while (defined(my $line = <$rpipe>)) {
 			if ($line =~ /\AStatus:\s*(\d+)/) {
 				$code = $1;
 			} else {
@@ -200,7 +199,7 @@ sub serve_smart {
 		my ($cb) = @_;
 		my $fh = $cb->([ $code, \@h ]);
 		while (1) {
-			my $r = $out->read($buf, 8192);
+			my $r = sysread($rpipe, $buf, 8192);
 			die "$!\n" unless defined $r;
 			last if ($r == 0);
 			$fh->write($buf);
-- 
EW


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] git-http-backend: start refactoring to use callback
  2016-02-25  4:02 [PATCH 0/3] migrate git-http-backend to async use Eric Wong
  2016-02-25  4:02 ` [PATCH 1/3] use pipe for git-http-backend output Eric Wong
@ 2016-02-25  4:02 ` Eric Wong
  2016-02-25  4:02 ` [PATCH 3/3] git-http-backend: start async API for streaming Eric Wong
  2016-02-25  4:39 ` [PATCH 4/3] git-http-backend: avoid multi-arg print statemtents Eric Wong
  3 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-02-25  4:02 UTC (permalink / raw)
  To: meta

Designing for asynchronous, non-blocking operations makes
adapting for synchronous, blocking operation easy.

Going the other way around is not easy, so do it now and
allow us to be more easily adapted for non-blocking use
in the next commit...
---
 lib/PublicInbox/GitHTTPBackend.pm | 73 +++++++++++++++++++++++++--------------
 1 file changed, 47 insertions(+), 26 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index cba025e..3cf7857 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -178,34 +178,55 @@ sub serve_smart {
 		my @cmd = qw(git http-backend);
 		exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
 	}
-	$wpipe = undef;
-	$in = undef;
-	my @h;
-	my $code = 200;
-	{
-		local $/ = "\r\n";
-		while (defined(my $line = <$rpipe>)) {
-			if ($line =~ /\AStatus:\s*(\d+)/) {
-				$code = $1;
-			} else {
-				chomp $line;
-				last if $line eq '';
-				push @h, split(/:\s*/, $line, 2);
-			}
+	$wpipe = $in = undef;
+	$rpipe->blocking(0);
+	$buf = '';
+	my $vin;
+	vec($vin, fileno($rpipe), 1) = 1;
+	my ($fh, $res);
+	my $fail = sub {
+		my ($e) = @_;
+		if ($e eq 'EAGAIN') {
+			select($vin, undef, undef, undef);
+		} else {
+			$rpipe = undef;
+			$fh->close if $fh;
+			$err->print('git http-backend error: ', $e, "\n");
 		}
-	}
-	return if $code == 403;
-	sub {
-		my ($cb) = @_;
-		my $fh = $cb->([ $code, \@h ]);
-		while (1) {
-			my $r = sysread($rpipe, $buf, 8192);
-			die "$!\n" unless defined $r;
-			last if ($r == 0);
-			$fh->write($buf);
+	};
+	my $cb = sub {
+		my $r = sysread($rpipe, $buf, 8192, length($buf));
+		return $fail->($!{EAGAIN} ? 'EAGAIN' : $!) unless defined $r;
+		if ($r == 0) { # EOF
+			$rpipe = undef;
+			$fh->close if $fh;
+			return;
 		}
-		$fh->close;
-	}
+		if ($fh) { # stream body from git-http-backend to HTTP client
+			$fh->write($buf);
+			$buf = '';
+		} elsif ($buf =~ s/\A(.*?)\r?\n\r?\n//s) { # parse headers
+			my $h = $1;
+			my $code = 200;
+			my @h;
+			foreach my $l (split(/\r?\n/, $h)) {
+				my ($k, $v) = split(/:\s*/, $l, 2);
+				if ($k =~ /\AStatus\z/i) {
+					$code = int($v);
+				} else {
+					push @h, $k, $v;
+				}
+			}
+			# write response header:
+			$fh = $res->([ $code, \@h ]);
+			$fh->write($buf);
+			$buf = '';
+		} # else { keep reading ... }
+	};
+	sub {
+		($res) = @_;
+		while ($rpipe) { $cb->() }
+	};
 }
 
 1;
-- 
EW


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] git-http-backend: start async API for streaming
  2016-02-25  4:02 [PATCH 0/3] migrate git-http-backend to async use Eric Wong
  2016-02-25  4:02 ` [PATCH 1/3] use pipe for git-http-backend output Eric Wong
  2016-02-25  4:02 ` [PATCH 2/3] git-http-backend: start refactoring to use callback Eric Wong
@ 2016-02-25  4:02 ` Eric Wong
  2016-02-25  4:30   ` [PATCH v2] " Eric Wong
  2016-02-25  4:39 ` [PATCH 4/3] git-http-backend: avoid multi-arg print statemtents Eric Wong
  3 siblings, 1 reply; 6+ messages in thread
From: Eric Wong @ 2016-02-25  4:02 UTC (permalink / raw)
  To: meta

git-http-backend may take a while, ensure we can process other
requests while waiting on it.  We currently do this via
Danga::Socket in public-inbox-httpd; but avoid exposing this
internal implementation detail to the PSGI interface and
instead only expose a callback via: $env->{'pi-httpd.async'}
---
 lib/PublicInbox/GitHTTPBackend.pm | 26 +++++++++++++++-----------
 public-inbox-httpd                | 31 +++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 3cf7857..31b9cd8 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -179,23 +179,21 @@ sub serve_smart {
 		exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
 	}
 	$wpipe = $in = undef;
-	$rpipe->blocking(0);
 	$buf = '';
-	my $vin;
-	vec($vin, fileno($rpipe), 1) = 1;
-	my ($fh, $res);
+	my ($vin, $fh, $res);
 	my $fail = sub {
 		my ($e) = @_;
 		if ($e eq 'EAGAIN') {
-			select($vin, undef, undef, undef);
+			select($vin, undef, undef, undef) if defined $vin;
+			# $vin is undef on async, so this is a noop on EAGAIN
 		} else {
 			$rpipe = undef;
 			$fh->close if $fh;
 			$err->print('git http-backend error: ', $e, "\n");
 		}
 	};
-	my $cb = sub {
-		my $r = sysread($rpipe, $buf, 8192, length($buf));
+	my $cb = sub { # read git-http-backend output and stream to client
+		my $r = $rpipe ? sysread($rpipe, $buf, 8192, length($buf)) : 0;
 		return $fail->($!{EAGAIN} ? 'EAGAIN' : $!) unless defined $r;
 		if ($r == 0) { # EOF
 			$rpipe = undef;
@@ -223,10 +221,16 @@ sub serve_smart {
 			$buf = '';
 		} # else { keep reading ... }
 	};
-	sub {
-		($res) = @_;
-		while ($rpipe) { $cb->() }
-	};
+	if (my $async = $env->{'pi-httpd.async'}) {
+		$async->($rpipe, $cb);
+		sub { ($res) = @_ } # let Danga::Socket handle the rest.
+	} else { # synchronous loop
+		vec($vin, fileno($rpipe), 1) = 1;
+		sub {
+			($res) = @_;
+			while ($rpipe) { $cb->() }
+		}
+	}
 }
 
 1;
diff --git a/public-inbox-httpd b/public-inbox-httpd
index 3635c9a..cf07628 100644
--- a/public-inbox-httpd
+++ b/public-inbox-httpd
@@ -53,11 +53,38 @@ daemon_run('0.0.0.0:8080', $refresh,
 	});
 
 1;
+
+package PublicInbox::HTTPD::Async;
+use strict;
+use warnings;
+use base qw(Danga::Socket);
+use fields qw(cb);
+
+sub new {
+	my ($class, $io, $cb) = @_;
+	my $self = fields::new($class);
+	$io->blocking(0);
+	$self->SUPER::new($io);
+	$self->{cb} = $cb;
+	$self->watch_read(1);
+	$self;
+}
+
+sub event_read { $_[0]->{cb}->() }
+sub event_hup { $_[0]->{cb}->() }
+
+1;
+
 package PublicInbox::HTTPD;
 use strict;
 use warnings;
 use Plack::Util;
 
+sub pi_httpd_async {
+	my ($io, $cb) = @_;
+	PublicInbox::HTTPD::Async->new($io, $cb);
+}
+
 sub new {
 	my ($class, $sock, $app) = @_;
 	my $n = getsockname($sock) or die "not a socket: $sock $!\n";
@@ -85,6 +112,10 @@ sub new {
 		'psgi.multiprocess' => Plack::Util::TRUE,
 		'psgix.harakiri'=> Plack::Util::FALSE,
 		'psgix.input.buffered' => Plack::Util::TRUE,
+		'pi-httpd.async' => do {
+			no warnings 'once';
+			*pi_httpd_async
+		},
 	);
 	bless {
 		err => \*STDERR,
-- 
EW


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2] git-http-backend: start async API for streaming
  2016-02-25  4:02 ` [PATCH 3/3] git-http-backend: start async API for streaming Eric Wong
@ 2016-02-25  4:30   ` Eric Wong
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-02-25  4:30 UTC (permalink / raw)
  To: meta

git-http-backend may take a while, ensure we can process other
requests while waiting on it.  We currently do this via
Danga::Socket in public-inbox-httpd; but avoid exposing this
internal implementation detail to the PSGI interface and
instead only expose a callback via: $env->{'pi-httpd.async'}
---
 v2: reap dead children, oops.

 lib/PublicInbox/GitHTTPBackend.pm | 62 ++++++++++++++++++++++++++-------------
 public-inbox-httpd                | 32 ++++++++++++++++++++
 2 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 3cf7857..9c32535 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -158,6 +158,7 @@ sub serve_smart {
 		$err->print('error forking: ', $!, "\n");
 		return r(500);
 	}
+	my $git_dir = $git->{git_dir};
 	if ($pid == 0) {
 		# GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
 		# may be set in the server-process and are passed as-is
@@ -172,36 +173,47 @@ sub serve_smart {
 		}
 		# $ENV{GIT_PROJECT_ROOT} = $git->{git_dir};
 		$ENV{GIT_HTTP_EXPORT_ALL} = '1';
-		$ENV{PATH_TRANSLATED} = "$git->{git_dir}/$path";
+		$ENV{PATH_TRANSLATED} = "$git_dir/$path";
 		dup2(fileno($in), 0) or die "redirect stdin failed: $!\n";
 		dup2(fileno($wpipe), 1) or die "redirect stdout failed: $!\n";
 		my @cmd = qw(git http-backend);
 		exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
 	}
 	$wpipe = $in = undef;
-	$rpipe->blocking(0);
 	$buf = '';
-	my $vin;
-	vec($vin, fileno($rpipe), 1) = 1;
-	my ($fh, $res);
-	my $fail = sub {
-		my ($e) = @_;
-		if ($e eq 'EAGAIN') {
-			select($vin, undef, undef, undef);
+	my ($vin, $fh, $res);
+	my $end = sub {
+		if ($fh) {
+			$fh->close;
+			$fh = undef;
 		} else {
+			$res->(r(500)) if $res;
+		}
+		if ($rpipe) {
+			$rpipe->close; # _may_ be Danga::Socket::close
 			$rpipe = undef;
-			$fh->close if $fh;
-			$err->print('git http-backend error: ', $e, "\n");
+		}
+		if (defined $pid) {
+			my $wpid = $pid;
+			$pid = undef;
+			return if $wpid == waitpid($wpid, 0);
+			$err->print("git http-backend ($git_dir): $?\n");
 		}
 	};
-	my $cb = sub {
-		my $r = sysread($rpipe, $buf, 8192, length($buf));
-		return $fail->($!{EAGAIN} ? 'EAGAIN' : $!) unless defined $r;
-		if ($r == 0) { # EOF
-			$rpipe = undef;
-			$fh->close if $fh;
+	my $fail = sub {
+		my ($e) = @_;
+		if ($e eq 'EAGAIN') {
+			select($vin, undef, undef, undef) if defined $vin;
+			# $vin is undef on async, so this is a noop on EAGAIN
 			return;
 		}
+		$end->();
+		$err->print("git http-backend ($git_dir): $e\n");
+	};
+	my $cb = sub { # read git-http-backend output and stream to client
+		my $r = $rpipe ? $rpipe->sysread($buf, 8192, length($buf)) : 0;
+		return $fail->($!{EAGAIN} ? 'EAGAIN' : $!) unless defined $r;
+		return $end->() if $r == 0; # EOF
 		if ($fh) { # stream body from git-http-backend to HTTP client
 			$fh->write($buf);
 			$buf = '';
@@ -219,14 +231,22 @@ sub serve_smart {
 			}
 			# write response header:
 			$fh = $res->([ $code, \@h ]);
+			$res = undef;
 			$fh->write($buf);
 			$buf = '';
 		} # else { keep reading ... }
 	};
-	sub {
-		($res) = @_;
-		while ($rpipe) { $cb->() }
-	};
+	if (my $async = $env->{'pi-httpd.async'}) {
+		$rpipe = $async->($rpipe, $cb);
+		sub { ($res) = @_ } # let Danga::Socket handle the rest.
+	} else { # synchronous loop
+		$vin = '';
+		vec($vin, fileno($rpipe), 1) = 1;
+		sub {
+			($res) = @_;
+			while ($rpipe) { $cb->() }
+		}
+	}
 }
 
 1;
diff --git a/public-inbox-httpd b/public-inbox-httpd
index 3635c9a..0c1e24c 100644
--- a/public-inbox-httpd
+++ b/public-inbox-httpd
@@ -53,11 +53,39 @@ daemon_run('0.0.0.0:8080', $refresh,
 	});
 
 1;
+
+package PublicInbox::HTTPD::Async;
+use strict;
+use warnings;
+use base qw(Danga::Socket);
+use fields qw(cb);
+
+sub new {
+	my ($class, $io, $cb) = @_;
+	my $self = fields::new($class);
+	$io->blocking(0);
+	$self->SUPER::new($io);
+	$self->{cb} = $cb;
+	$self->watch_read(1);
+	$self;
+}
+
+sub event_read { $_[0]->{cb}->() }
+sub event_hup { $_[0]->{cb}->() }
+sub sysread { shift->{sock}->sysread(@_) }
+
+1;
+
 package PublicInbox::HTTPD;
 use strict;
 use warnings;
 use Plack::Util;
 
+sub pi_httpd_async {
+	my ($io, $cb) = @_;
+	PublicInbox::HTTPD::Async->new($io, $cb);
+}
+
 sub new {
 	my ($class, $sock, $app) = @_;
 	my $n = getsockname($sock) or die "not a socket: $sock $!\n";
@@ -85,6 +113,10 @@ sub new {
 		'psgi.multiprocess' => Plack::Util::TRUE,
 		'psgix.harakiri'=> Plack::Util::FALSE,
 		'psgix.input.buffered' => Plack::Util::TRUE,
+		'pi-httpd.async' => do {
+			no warnings 'once';
+			*pi_httpd_async
+		},
 	);
 	bless {
 		err => \*STDERR,
-- 
EW


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/3] git-http-backend: avoid multi-arg print statemtents
  2016-02-25  4:02 [PATCH 0/3] migrate git-http-backend to async use Eric Wong
                   ` (2 preceding siblings ...)
  2016-02-25  4:02 ` [PATCH 3/3] git-http-backend: start async API for streaming Eric Wong
@ 2016-02-25  4:39 ` Eric Wong
  3 siblings, 0 replies; 6+ messages in thread
From: Eric Wong @ 2016-02-25  4:39 UTC (permalink / raw)
  To: meta

Even with output buffering disabled via IO::Handle::autoflush,
writes are not atomic unless it is a single argument passed to
"print".  Multiple arguments to "print" will show up as multiple
calls to write(2) instead of a single, atomic writev(2).
---
 lib/PublicInbox/GitHTTPBackend.pm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index 9c32535..5879970 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -139,7 +139,7 @@ sub serve_smart {
 		while (1) {
 			my $r = $input->read($buf, 8192);
 			unless (defined $r) {
-				$err->print('error reading input: ', $!, "\n");
+				$err->print("error reading input: $!\n");
 				return r(500);
 			}
 			last if ($r == 0);
@@ -150,12 +150,12 @@ sub serve_smart {
 	}
 	my ($rpipe, $wpipe);
 	unless (pipe($rpipe, $wpipe)) {
-		$err->print('error creating pipe', $!, "\n");
+		$err->print("error creating pipe: $!\n");
 		return r(500);
 	}
 	my $pid = fork; # TODO: vfork under Linux...
 	unless (defined $pid) {
-		$err->print('error forking: ', $!, "\n");
+		$err->print("error forking: $!\n");
 		return r(500);
 	}
 	my $git_dir = $git->{git_dir};
-- 
EW


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-02-25  4:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-02-25  4:02 [PATCH 0/3] migrate git-http-backend to async use Eric Wong
2016-02-25  4:02 ` [PATCH 1/3] use pipe for git-http-backend output Eric Wong
2016-02-25  4:02 ` [PATCH 2/3] git-http-backend: start refactoring to use callback Eric Wong
2016-02-25  4:02 ` [PATCH 3/3] git-http-backend: start async API for streaming Eric Wong
2016-02-25  4:30   ` [PATCH v2] " Eric Wong
2016-02-25  4:39 ` [PATCH 4/3] git-http-backend: avoid multi-arg print statemtents Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).