user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] support smart HTTP cloning
Date: Sun,  7 Feb 2016 09:00:47 +0000	[thread overview]
Message-ID: <20160207090047.11796-1-e@80x24.org> (raw)

This requires POST and (small file) upload support from the
PSGI/Plack web server.  CGI.pm is currently not supported with
this feature.

We'll serve everything git can handle by default for performance
in the general case.

To avoid introducing cognitive overhead for sysadmins managing
existing HTTP backends, we do not introduce new configuration
directives.

Thus, setting http.uploadpack=false in the relevant git config
file for each public-inbox (ssoma) git repo will disable smart
HTTP for CPU/memory-constrained systems.

Technically we could support http.receivepack to allow posting
messages to a public-inbox over HTTP(S), but that breaks
the public-inbox model of encouraging users to Cc: everyone.
Again, we encourage users to Cc: everyone to reduce the chance
of a public-inbox becoming a centralized point of
failure/censorship.
---
 Documentation/design_www.txt                       |  2 +
 .../{GitHTTPDumb.pm => GitHTTPBackend.pm}          | 97 +++++++++++++++++++++-
 lib/PublicInbox/WWW.pm                             | 17 ++--
 3 files changed, 109 insertions(+), 7 deletions(-)
 rename lib/PublicInbox/{GitHTTPDumb.pm => GitHTTPBackend.pm} (53%)

diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index 7601523..39b1241 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -25,6 +25,8 @@ URL naming
 
 /$LISTNAME/atom.xml [2]                   -> identical to /$LISTNAME/new.atom
 
+Additionally, we support "git clone" pointed to http://$HOST/$LISTNAME
+
 FIXME: we must refactor/cleanup/add tests for most of our CGI before
 adding more endpoints and features.
 
diff --git a/lib/PublicInbox/GitHTTPDumb.pm b/lib/PublicInbox/GitHTTPBackend.pm
similarity index 53%
rename from lib/PublicInbox/GitHTTPDumb.pm
rename to lib/PublicInbox/GitHTTPBackend.pm
index c088d8c..71b7a8f 100644
--- a/lib/PublicInbox/GitHTTPDumb.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -2,11 +2,12 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # when no endpoints match, fallback to this and serve a static file
-# This can serve Smart HTTP in the future.
-package PublicInbox::GitHTTPDumb;
+# or smart HTTP
+package PublicInbox::GitHTTPBackend;
 use strict;
 use warnings;
 use Fcntl qw(:seek);
+use POSIX qw(dup2);
 
 # n.b. serving "description" and "cloneurl" should be innocuous enough to
 # not cause problems.  serving "config" might...
@@ -29,6 +30,12 @@ sub r {
 
 sub serve {
 	my ($cgi, $git, $path) = @_;
+	my $service = $cgi->param('service') || '';
+	if ($service =~ /\Agit-\w+-pack\z/ || $path =~ /\Agit-\w+-pack\z/) {
+		my $ok = serve_smart($cgi, $git, $path);
+		return $ok if $ok;
+	}
+
 	my $type;
 	if ($path =~ /\A(?:$BIN)\z/o) {
 		$type = 'application/octet-stream';
@@ -118,4 +125,90 @@ sub prepare_range {
 	($code, $len);
 }
 
+# returns undef if 403 so it falls back to dumb HTTP
+sub serve_smart {
+	my ($cgi, $git, $path) = @_;
+	my $env = $cgi->{env};
+
+	my $input = $env->{'psgi.input'};
+	my $buf;
+	my $in;
+	my $err = $env->{'psgi.errors'};
+	if (fileno($input) >= 0) { # FIXME untested
+		$in = $input;
+	} else {
+		$in = IO::File->new_tmpfile;
+		while (1) {
+			my $r = $input->read($buf, 8192);
+			unless (defined $r) {
+				$err->print('error reading input: ', $!, "\n");
+				return r(500);
+			}
+			last if ($r == 0);
+			$in->write($buf);
+		}
+		$in->flush;
+		$in->sysseek(0, SEEK_SET);
+	}
+	my $out = IO::File->new_tmpfile;
+	my $pid = fork; # TODO: vfork under Linux...
+	unless (defined $pid) {
+		$err->print('error forking: ', $!, "\n");
+		return r(500);
+	}
+	if ($pid == 0) {
+		# GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
+		# may be set in the server-process and are passed as-is
+		foreach my $name (qw(QUERY_STRING
+					REMOTE_USER REMOTE_ADDR
+					HTTP_CONTENT_ENCODING
+					CONTENT_TYPE
+					SERVER_PROTOCOL
+					REQUEST_METHOD)) {
+			my $val = $env->{$name};
+			$ENV{$name} = $val if defined $val;
+		}
+		# $ENV{GIT_PROJECT_ROOT} = $git->{git_dir};
+		$ENV{GIT_HTTP_EXPORT_ALL} = '1';
+		$ENV{PATH_TRANSLATED} = "$git->{git_dir}/$path";
+		dup2(fileno($in), 0) or die "redirect stdin failed: $!\n";
+		dup2(fileno($out), 1) or die "redirect stdout failed: $!\n";
+		my @cmd = qw(git http-backend);
+		exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n";
+	}
+
+	if (waitpid($pid, 0) != $pid) {
+		$err->print("git http-backend ($git->{git_dir}): ", $?, "\n");
+		return r(500);
+	}
+	$in = undef;
+	$out->seek(0, SEEK_SET);
+	my @h;
+	my $code = 200;
+	{
+		local $/ = "\r\n";
+		while (defined(my $line = <$out>)) {
+			if ($line =~ /\AStatus:\s*(\d+)/) {
+				$code = $1;
+			} else {
+				chomp $line;
+				last if $line eq '';
+				push @h, split(/:\s*/, $line, 2);
+			}
+		}
+	}
+	return if $code == 403;
+	sub {
+		my ($cb) = @_;
+		my $fh = $cb->([ $code, \@h ]);
+		while (1) {
+			my $r = $out->read($buf, 8192);
+			die "$!\n" unless defined $r;
+			last if ($r == 0);
+			$fh->write($buf);
+		}
+		$fh->close;
+	}
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 1c6936f..b4b012f 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -18,7 +18,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape);
 use constant SSOMA_URL => 'http://ssoma.public-inbox.org/';
 use constant PI_URL => 'http://public-inbox.org/';
 require PublicInbox::Git;
-use PublicInbox::GitHTTPDumb;
+use PublicInbox::GitHTTPBackend;
 our $LISTNAME_RE = qr!\A/([\w\.\-]+)!;
 our $MID_RE = qr!([^/]+)!;
 our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!;
@@ -28,10 +28,17 @@ sub run {
 	my ($cgi, $method) = @_;
 	$pi_config ||= PublicInbox::Config->new;
 	my $ctx = { cgi => $cgi, pi_config => $pi_config };
-	if ($method !~ /\AGET|HEAD\z/) {
+	my $path_info = $cgi->path_info;
+
+	if ($method eq 'POST' &&
+		 $path_info =~ m!$LISTNAME_RE/(git-upload-pack)\z!) {
+		my $path = $2;
+		return (invalid_list($ctx, $1) ||
+			serve_git($cgi, $ctx->{git}, $path));
+	}
+	elsif ($method !~ /\AGET|HEAD\z/) {
 		return r(405, 'Method Not Allowed');
 	}
-	my $path_info = $cgi->path_info;
 
 	# top-level indices and feeds
 	if ($path_info eq '/') {
@@ -44,7 +51,7 @@ sub run {
 		invalid_list($ctx, $1) || get_atom($ctx);
 
 	} elsif ($path_info =~ m!$LISTNAME_RE/
-				($PublicInbox::GitHTTPDumb::ANY)\z!ox) {
+				($PublicInbox::GitHTTPBackend::ANY)\z!ox) {
 		my $path = $2;
 		invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path);
 	} elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) {
@@ -402,7 +409,7 @@ sub msg_page {
 
 sub serve_git {
 	my ($cgi, $git, $path) = @_;
-	PublicInbox::GitHTTPDumb::serve($cgi, $git, $path);
+	PublicInbox::GitHTTPBackend::serve($cgi, $git, $path);
 }
 
 1;
-- 
EW


                 reply	other threads:[~2016-02-07  9:00 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160207090047.11796-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).