From 0af0bd903b9b5aede71155ce8756e01a229b40bb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 7 Feb 2016 08:35:29 +0000 Subject: support smart HTTP cloning This requires POST and (small file) upload support from the PSGI/Plack web server. CGI.pm is currently not supported with this feature. We'll serve everything git can handle by default for performance in the general case. To avoid introducing cognitive overhead for sysadmins managing existing HTTP backends, we do not introduce new configuration directives. Thus, setting http.uploadpack=false in the relevant git config file for each public-inbox (ssoma) git repo will disable smart HTTP for CPU/memory-constrained systems. Technically we could support http.receivepack to allow posting messages to a public-inbox over HTTP(S), but that breaks the public-inbox model of encouraging users to Cc: everyone. Again, we encourage users to Cc: everyone to reduce the chance of a public-inbox becoming a centralized point of failure/censorship. --- Documentation/design_www.txt | 2 + lib/PublicInbox/GitHTTPBackend.pm | 214 ++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/GitHTTPDumb.pm | 121 --------------------- lib/PublicInbox/WWW.pm | 17 ++- 4 files changed, 228 insertions(+), 126 deletions(-) create mode 100644 lib/PublicInbox/GitHTTPBackend.pm delete mode 100644 lib/PublicInbox/GitHTTPDumb.pm diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt index 76015233..39b12414 100644 --- a/Documentation/design_www.txt +++ b/Documentation/design_www.txt @@ -25,6 +25,8 @@ URL naming /$LISTNAME/atom.xml [2] -> identical to /$LISTNAME/new.atom +Additionally, we support "git clone" pointed to http://$HOST/$LISTNAME + FIXME: we must refactor/cleanup/add tests for most of our CGI before adding more endpoints and features. diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm new file mode 100644 index 00000000..71b7a8f1 --- /dev/null +++ b/lib/PublicInbox/GitHTTPBackend.pm @@ -0,0 +1,214 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# when no endpoints match, fallback to this and serve a static file +# or smart HTTP +package PublicInbox::GitHTTPBackend; +use strict; +use warnings; +use Fcntl qw(:seek); +use POSIX qw(dup2); + +# n.b. serving "description" and "cloneurl" should be innocuous enough to +# not cause problems. serving "config" might... +my @text = qw[HEAD info/refs + objects/info/(?:http-alternates|alternates|packs) + cloneurl description]; + +my @binary = qw! + objects/[a-f0-9]{2}/[a-f0-9]{38} + objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx) + !; + +our $ANY = join('|', @binary, @text); +my $BIN = join('|', @binary); +my $TEXT = join('|', @text); + +sub r { + [ $_[0] , [qw(Content-Type text/plain Content-Length 0) ], [] ] +} + +sub serve { + my ($cgi, $git, $path) = @_; + my $service = $cgi->param('service') || ''; + if ($service =~ /\Agit-\w+-pack\z/ || $path =~ /\Agit-\w+-pack\z/) { + my $ok = serve_smart($cgi, $git, $path); + return $ok if $ok; + } + + my $type; + if ($path =~ /\A(?:$BIN)\z/o) { + $type = 'application/octet-stream'; + } elsif ($path =~ /\A(?:$TEXT)\z/o) { + $type = 'text/plain'; + } else { + return r(404); + } + my $f = "$git->{git_dir}/$path"; + return r(404) unless -f $f && -r _; + my @st = stat(_); + my $size = $st[7]; + + # TODO: If-Modified-Since and Last-Modified + open my $in, '<', $f or return r(404); + my $code = 200; + my $len = $size; + my @h; + + my $env = $cgi->{env} || \%ENV; + my $range = $env->{HTTP_RANGE}; + if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { + ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); + if ($code == 416) { + push @h, 'Content-Range', "bytes */$size"; + return [ 416, \@h, [] ]; + } + } + + push @h, 'Content-Type', $type, 'Content-Length', $len; + sub { + my ($res) = @_; # Plack callback + my $fh = $res->([ $code, \@h ]); + my $buf; + my $n = 8192; + while ($len > 0) { + $n = $len if $len < $n; + my $r = read($in, $buf, $n); + last if (!defined($r) || $r <= 0); + $len -= $r; + $fh->write($buf); + } + $fh->close; + } +} + +sub prepare_range { + my ($cgi, $in, $h, $beg, $end, $size) = @_; + my $code = 200; + my $len = $size; + if ($beg eq '') { + if ($end ne '') { # "bytes=-$end" => last N bytes + $beg = $size - $end; + $beg = 0 if $beg < 0; + $end = $size - 1; + $code = 206; + } else { + $code = 416; + } + } else { + if ($beg > $size) { + $code = 416; + } elsif ($end eq '' || $end >= $size) { + $end = $size - 1; + $code = 206; + } elsif ($end < $size) { + $code = 206; + } else { + $code = 416; + } + } + if ($code == 206) { + $len = $end - $beg + 1; + if ($len <= 0) { + $code = 416; + } else { + seek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + push @$h, qw(Accept-Ranges bytes Content-Range); + push @$h, "bytes $beg-$end/$size"; + + # FIXME: Plack::Middleware::Deflater bug? + if (my $env = $cgi->{env}) { + $env->{'psgix.no-compress'} = 1; + } + } + } + ($code, $len); +} + +# returns undef if 403 so it falls back to dumb HTTP +sub serve_smart { + my ($cgi, $git, $path) = @_; + my $env = $cgi->{env}; + + my $input = $env->{'psgi.input'}; + my $buf; + my $in; + my $err = $env->{'psgi.errors'}; + if (fileno($input) >= 0) { # FIXME untested + $in = $input; + } else { + $in = IO::File->new_tmpfile; + while (1) { + my $r = $input->read($buf, 8192); + unless (defined $r) { + $err->print('error reading input: ', $!, "\n"); + return r(500); + } + last if ($r == 0); + $in->write($buf); + } + $in->flush; + $in->sysseek(0, SEEK_SET); + } + my $out = IO::File->new_tmpfile; + my $pid = fork; # TODO: vfork under Linux... + unless (defined $pid) { + $err->print('error forking: ', $!, "\n"); + return r(500); + } + if ($pid == 0) { + # GIT_HTTP_EXPORT_ALL, GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL + # may be set in the server-process and are passed as-is + foreach my $name (qw(QUERY_STRING + REMOTE_USER REMOTE_ADDR + HTTP_CONTENT_ENCODING + CONTENT_TYPE + SERVER_PROTOCOL + REQUEST_METHOD)) { + my $val = $env->{$name}; + $ENV{$name} = $val if defined $val; + } + # $ENV{GIT_PROJECT_ROOT} = $git->{git_dir}; + $ENV{GIT_HTTP_EXPORT_ALL} = '1'; + $ENV{PATH_TRANSLATED} = "$git->{git_dir}/$path"; + dup2(fileno($in), 0) or die "redirect stdin failed: $!\n"; + dup2(fileno($out), 1) or die "redirect stdout failed: $!\n"; + my @cmd = qw(git http-backend); + exec(@cmd) or die 'exec `' . join(' ', @cmd). "' failed: $!\n"; + } + + if (waitpid($pid, 0) != $pid) { + $err->print("git http-backend ($git->{git_dir}): ", $?, "\n"); + return r(500); + } + $in = undef; + $out->seek(0, SEEK_SET); + my @h; + my $code = 200; + { + local $/ = "\r\n"; + while (defined(my $line = <$out>)) { + if ($line =~ /\AStatus:\s*(\d+)/) { + $code = $1; + } else { + chomp $line; + last if $line eq ''; + push @h, split(/:\s*/, $line, 2); + } + } + } + return if $code == 403; + sub { + my ($cb) = @_; + my $fh = $cb->([ $code, \@h ]); + while (1) { + my $r = $out->read($buf, 8192); + die "$!\n" unless defined $r; + last if ($r == 0); + $fh->write($buf); + } + $fh->close; + } +} + +1; diff --git a/lib/PublicInbox/GitHTTPDumb.pm b/lib/PublicInbox/GitHTTPDumb.pm deleted file mode 100644 index c088d8c4..00000000 --- a/lib/PublicInbox/GitHTTPDumb.pm +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (C) 2016 all contributors -# License: AGPL-3.0+ - -# when no endpoints match, fallback to this and serve a static file -# This can serve Smart HTTP in the future. -package PublicInbox::GitHTTPDumb; -use strict; -use warnings; -use Fcntl qw(:seek); - -# n.b. serving "description" and "cloneurl" should be innocuous enough to -# not cause problems. serving "config" might... -my @text = qw[HEAD info/refs - objects/info/(?:http-alternates|alternates|packs) - cloneurl description]; - -my @binary = qw! - objects/[a-f0-9]{2}/[a-f0-9]{38} - objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx) - !; - -our $ANY = join('|', @binary, @text); -my $BIN = join('|', @binary); -my $TEXT = join('|', @text); - -sub r { - [ $_[0] , [qw(Content-Type text/plain Content-Length 0) ], [] ] -} - -sub serve { - my ($cgi, $git, $path) = @_; - my $type; - if ($path =~ /\A(?:$BIN)\z/o) { - $type = 'application/octet-stream'; - } elsif ($path =~ /\A(?:$TEXT)\z/o) { - $type = 'text/plain'; - } else { - return r(404); - } - my $f = "$git->{git_dir}/$path"; - return r(404) unless -f $f && -r _; - my @st = stat(_); - my $size = $st[7]; - - # TODO: If-Modified-Since and Last-Modified - open my $in, '<', $f or return r(404); - my $code = 200; - my $len = $size; - my @h; - - my $env = $cgi->{env} || \%ENV; - my $range = $env->{HTTP_RANGE}; - if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { - ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); - if ($code == 416) { - push @h, 'Content-Range', "bytes */$size"; - return [ 416, \@h, [] ]; - } - } - - push @h, 'Content-Type', $type, 'Content-Length', $len; - sub { - my ($res) = @_; # Plack callback - my $fh = $res->([ $code, \@h ]); - my $buf; - my $n = 8192; - while ($len > 0) { - $n = $len if $len < $n; - my $r = read($in, $buf, $n); - last if (!defined($r) || $r <= 0); - $len -= $r; - $fh->write($buf); - } - $fh->close; - } -} - -sub prepare_range { - my ($cgi, $in, $h, $beg, $end, $size) = @_; - my $code = 200; - my $len = $size; - if ($beg eq '') { - if ($end ne '') { # "bytes=-$end" => last N bytes - $beg = $size - $end; - $beg = 0 if $beg < 0; - $end = $size - 1; - $code = 206; - } else { - $code = 416; - } - } else { - if ($beg > $size) { - $code = 416; - } elsif ($end eq '' || $end >= $size) { - $end = $size - 1; - $code = 206; - } elsif ($end < $size) { - $code = 206; - } else { - $code = 416; - } - } - if ($code == 206) { - $len = $end - $beg + 1; - if ($len <= 0) { - $code = 416; - } else { - seek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; - push @$h, qw(Accept-Ranges bytes Content-Range); - push @$h, "bytes $beg-$end/$size"; - - # FIXME: Plack::Middleware::Deflater bug? - if (my $env = $cgi->{env}) { - $env->{'psgix.no-compress'} = 1; - } - } - } - ($code, $len); -} - -1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 1c6936f7..b4b012f9 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -18,7 +18,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; require PublicInbox::Git; -use PublicInbox::GitHTTPDumb; +use PublicInbox::GitHTTPBackend; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; @@ -28,10 +28,17 @@ sub run { my ($cgi, $method) = @_; $pi_config ||= PublicInbox::Config->new; my $ctx = { cgi => $cgi, pi_config => $pi_config }; - if ($method !~ /\AGET|HEAD\z/) { + my $path_info = $cgi->path_info; + + if ($method eq 'POST' && + $path_info =~ m!$LISTNAME_RE/(git-upload-pack)\z!) { + my $path = $2; + return (invalid_list($ctx, $1) || + serve_git($cgi, $ctx->{git}, $path)); + } + elsif ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); } - my $path_info = $cgi->path_info; # top-level indices and feeds if ($path_info eq '/') { @@ -44,7 +51,7 @@ sub run { invalid_list($ctx, $1) || get_atom($ctx); } elsif ($path_info =~ m!$LISTNAME_RE/ - ($PublicInbox::GitHTTPDumb::ANY)\z!ox) { + ($PublicInbox::GitHTTPBackend::ANY)\z!ox) { my $path = $2; invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path); } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { @@ -402,7 +409,7 @@ sub msg_page { sub serve_git { my ($cgi, $git, $path) = @_; - PublicInbox::GitHTTPDumb::serve($cgi, $git, $path); + PublicInbox::GitHTTPBackend::serve($cgi, $git, $path); } 1; -- cgit v1.2.3-24-ge0c7