From d39a8a440c9b5c59e1fa058467f64034f8974e0e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 2 Feb 2016 04:00:08 +0000 Subject: www: support git cloning via dumb HTTP This is enabled by default, for now. Smart HTTP cloning support will be added later, but it will be optional since it can be highly CPU and memory intensive. --- lib/PublicInbox/GitHTTPDumb.pm | 121 +++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WWW.pm | 10 ++++ public-inbox-index | 3 + public-inbox-mda | 8 ++- t/cgi.t | 18 ++++++ 5 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/GitHTTPDumb.pm diff --git a/lib/PublicInbox/GitHTTPDumb.pm b/lib/PublicInbox/GitHTTPDumb.pm new file mode 100644 index 00000000..c088d8c4 --- /dev/null +++ b/lib/PublicInbox/GitHTTPDumb.pm @@ -0,0 +1,121 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# when no endpoints match, fallback to this and serve a static file +# This can serve Smart HTTP in the future. +package PublicInbox::GitHTTPDumb; +use strict; +use warnings; +use Fcntl qw(:seek); + +# n.b. serving "description" and "cloneurl" should be innocuous enough to +# not cause problems. serving "config" might... +my @text = qw[HEAD info/refs + objects/info/(?:http-alternates|alternates|packs) + cloneurl description]; + +my @binary = qw! + objects/[a-f0-9]{2}/[a-f0-9]{38} + objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx) + !; + +our $ANY = join('|', @binary, @text); +my $BIN = join('|', @binary); +my $TEXT = join('|', @text); + +sub r { + [ $_[0] , [qw(Content-Type text/plain Content-Length 0) ], [] ] +} + +sub serve { + my ($cgi, $git, $path) = @_; + my $type; + if ($path =~ /\A(?:$BIN)\z/o) { + $type = 'application/octet-stream'; + } elsif ($path =~ /\A(?:$TEXT)\z/o) { + $type = 'text/plain'; + } else { + return r(404); + } + my $f = "$git->{git_dir}/$path"; + return r(404) unless -f $f && -r _; + my @st = stat(_); + my $size = $st[7]; + + # TODO: If-Modified-Since and Last-Modified + open my $in, '<', $f or return r(404); + my $code = 200; + my $len = $size; + my @h; + + my $env = $cgi->{env} || \%ENV; + my $range = $env->{HTTP_RANGE}; + if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { + ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); + if ($code == 416) { + push @h, 'Content-Range', "bytes */$size"; + return [ 416, \@h, [] ]; + } + } + + push @h, 'Content-Type', $type, 'Content-Length', $len; + sub { + my ($res) = @_; # Plack callback + my $fh = $res->([ $code, \@h ]); + my $buf; + my $n = 8192; + while ($len > 0) { + $n = $len if $len < $n; + my $r = read($in, $buf, $n); + last if (!defined($r) || $r <= 0); + $len -= $r; + $fh->write($buf); + } + $fh->close; + } +} + +sub prepare_range { + my ($cgi, $in, $h, $beg, $end, $size) = @_; + my $code = 200; + my $len = $size; + if ($beg eq '') { + if ($end ne '') { # "bytes=-$end" => last N bytes + $beg = $size - $end; + $beg = 0 if $beg < 0; + $end = $size - 1; + $code = 206; + } else { + $code = 416; + } + } else { + if ($beg > $size) { + $code = 416; + } elsif ($end eq '' || $end >= $size) { + $end = $size - 1; + $code = 206; + } elsif ($end < $size) { + $code = 206; + } else { + $code = 416; + } + } + if ($code == 206) { + $len = $end - $beg + 1; + if ($len <= 0) { + $code = 416; + } else { + seek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + push @$h, qw(Accept-Ranges bytes Content-Range); + push @$h, "bytes $beg-$end/$size"; + + # FIXME: Plack::Middleware::Deflater bug? + if (my $env = $cgi->{env}) { + $env->{'psgix.no-compress'} = 1; + } + } + } + ($code, $len); +} + +1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index d5635d84..1c6936f7 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -18,6 +18,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; require PublicInbox::Git; +use PublicInbox::GitHTTPDumb; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; @@ -42,6 +43,10 @@ sub run { } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { invalid_list($ctx, $1) || get_atom($ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/ + ($PublicInbox::GitHTTPDumb::ANY)\z!ox) { + my $path = $2; + invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path); } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); @@ -395,4 +400,9 @@ sub msg_page { r404($ctx); } +sub serve_git { + my ($cgi, $git, $path) = @_; + PublicInbox::GitHTTPDumb::serve($cgi, $git, $path); +} + 1; diff --git a/public-inbox-index b/public-inbox-index index 53449556..578d91d5 100755 --- a/public-inbox-index +++ b/public-inbox-index @@ -57,6 +57,9 @@ foreach my $dir (@dirs) { sub index_dir { my ($git_dir) = @_; -d $git_dir or die "$git_dir does not appear to be a git repository\n"; + + system('git', "--git-dir=$git_dir", 'update-server-info') and + die "git update-server-info failed for $git_dir"; my $s = PublicInbox::SearchIdx->new($git_dir, 1); $s->index_sync; } diff --git a/public-inbox-mda b/public-inbox-mda index 73c4ae1c..24feeb81 100755 --- a/public-inbox-mda +++ b/public-inbox-mda @@ -62,7 +62,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) && PublicInbox::MDA->author_info($msg); END { - search_index_sync($main_repo) if ($? == 0); + index_sync($main_repo) if ($? == 0); }; local $ENV{GIT_AUTHOR_NAME} = $name; @@ -98,8 +98,12 @@ sub do_spamc { return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1; } -sub search_index_sync { +sub index_sync { my ($git_dir) = @_; + + # potentially user-visible, ignore errors: + system('git', "--git-dir=$git_dir", 'update-server-info'); + eval { require PublicInbox::SearchIdx; PublicInbox::SearchIdx->new($git_dir, 2)->index_sync; diff --git a/t/cgi.t b/t/cgi.t index 18632cee..4ce6514c 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -102,6 +102,24 @@ EOF like($res->{head}, qr/Status:\s*404/i, "index returns 404"); } +# dumb HTTP support +{ + my $path = "/test/info/refs"; + my $res = cgi_run($path); + like($res->{head}, qr/Status:\s*200/i, "info/refs readable"); + my $orig = $res->{body}; + + local $ENV{HTTP_RANGE} = 'bytes=5-10'; + $res = cgi_run($path); + like($res->{head}, qr/Status:\s*206/i, "info/refs partial OK"); + is($res->{body}, substr($orig, 5, 6), 'partial body OK'); + + local $ENV{HTTP_RANGE} = 'bytes=5-'; + $res = cgi_run($path); + like($res->{head}, qr/Status:\s*206/i, "info/refs partial past end OK"); + is($res->{body}, substr($orig, 5), 'partial body OK past end'); +} + # atom feeds { local $ENV{HOME} = $home; -- cgit v1.2.3-24-ge0c7