From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.2 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, RP_MATCHES_RCVD,URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 865EE20276 for ; Tue, 2 Feb 2016 04:09:49 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www: support git cloning via dumb HTTP Date: Tue, 2 Feb 2016 04:09:49 +0000 Message-Id: <20160202040949.26853-1-e@80x24.org> List-Id: This is enabled by default, for now. Smart HTTP cloning support will be added later, but it will be optional since it can be highly CPU and memory intensive. --- lib/PublicInbox/GitHTTPDumb.pm | 121 +++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WWW.pm | 10 ++++ public-inbox-index | 3 + public-inbox-mda | 8 ++- t/cgi.t | 18 ++++++ 5 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 lib/PublicInbox/GitHTTPDumb.pm diff --git a/lib/PublicInbox/GitHTTPDumb.pm b/lib/PublicInbox/GitHTTPDumb.pm new file mode 100644 index 0000000..c088d8c --- /dev/null +++ b/lib/PublicInbox/GitHTTPDumb.pm @@ -0,0 +1,121 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# when no endpoints match, fallback to this and serve a static file +# This can serve Smart HTTP in the future. +package PublicInbox::GitHTTPDumb; +use strict; +use warnings; +use Fcntl qw(:seek); + +# n.b. serving "description" and "cloneurl" should be innocuous enough to +# not cause problems. serving "config" might... +my @text = qw[HEAD info/refs + objects/info/(?:http-alternates|alternates|packs) + cloneurl description]; + +my @binary = qw! + objects/[a-f0-9]{2}/[a-f0-9]{38} + objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx) + !; + +our $ANY = join('|', @binary, @text); +my $BIN = join('|', @binary); +my $TEXT = join('|', @text); + +sub r { + [ $_[0] , [qw(Content-Type text/plain Content-Length 0) ], [] ] +} + +sub serve { + my ($cgi, $git, $path) = @_; + my $type; + if ($path =~ /\A(?:$BIN)\z/o) { + $type = 'application/octet-stream'; + } elsif ($path =~ /\A(?:$TEXT)\z/o) { + $type = 'text/plain'; + } else { + return r(404); + } + my $f = "$git->{git_dir}/$path"; + return r(404) unless -f $f && -r _; + my @st = stat(_); + my $size = $st[7]; + + # TODO: If-Modified-Since and Last-Modified + open my $in, '<', $f or return r(404); + my $code = 200; + my $len = $size; + my @h; + + my $env = $cgi->{env} || \%ENV; + my $range = $env->{HTTP_RANGE}; + if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { + ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); + if ($code == 416) { + push @h, 'Content-Range', "bytes */$size"; + return [ 416, \@h, [] ]; + } + } + + push @h, 'Content-Type', $type, 'Content-Length', $len; + sub { + my ($res) = @_; # Plack callback + my $fh = $res->([ $code, \@h ]); + my $buf; + my $n = 8192; + while ($len > 0) { + $n = $len if $len < $n; + my $r = read($in, $buf, $n); + last if (!defined($r) || $r <= 0); + $len -= $r; + $fh->write($buf); + } + $fh->close; + } +} + +sub prepare_range { + my ($cgi, $in, $h, $beg, $end, $size) = @_; + my $code = 200; + my $len = $size; + if ($beg eq '') { + if ($end ne '') { # "bytes=-$end" => last N bytes + $beg = $size - $end; + $beg = 0 if $beg < 0; + $end = $size - 1; + $code = 206; + } else { + $code = 416; + } + } else { + if ($beg > $size) { + $code = 416; + } elsif ($end eq '' || $end >= $size) { + $end = $size - 1; + $code = 206; + } elsif ($end < $size) { + $code = 206; + } else { + $code = 416; + } + } + if ($code == 206) { + $len = $end - $beg + 1; + if ($len <= 0) { + $code = 416; + } else { + seek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + push @$h, qw(Accept-Ranges bytes Content-Range); + push @$h, "bytes $beg-$end/$size"; + + # FIXME: Plack::Middleware::Deflater bug? + if (my $env = $cgi->{env}) { + $env->{'psgix.no-compress'} = 1; + } + } + } + ($code, $len); +} + +1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index d5635d8..1c6936f 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -18,6 +18,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; require PublicInbox::Git; +use PublicInbox::GitHTTPDumb; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; @@ -42,6 +43,10 @@ sub run { } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { invalid_list($ctx, $1) || get_atom($ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/ + ($PublicInbox::GitHTTPDumb::ANY)\z!ox) { + my $path = $2; + invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path); } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); @@ -395,4 +400,9 @@ sub msg_page { r404($ctx); } +sub serve_git { + my ($cgi, $git, $path) = @_; + PublicInbox::GitHTTPDumb::serve($cgi, $git, $path); +} + 1; diff --git a/public-inbox-index b/public-inbox-index index 5344955..578d91d 100755 --- a/public-inbox-index +++ b/public-inbox-index @@ -57,6 +57,9 @@ foreach my $dir (@dirs) { sub index_dir { my ($git_dir) = @_; -d $git_dir or die "$git_dir does not appear to be a git repository\n"; + + system('git', "--git-dir=$git_dir", 'update-server-info') and + die "git update-server-info failed for $git_dir"; my $s = PublicInbox::SearchIdx->new($git_dir, 1); $s->index_sync; } diff --git a/public-inbox-mda b/public-inbox-mda index 73c4ae1..24feeb8 100755 --- a/public-inbox-mda +++ b/public-inbox-mda @@ -62,7 +62,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) && PublicInbox::MDA->author_info($msg); END { - search_index_sync($main_repo) if ($? == 0); + index_sync($main_repo) if ($? == 0); }; local $ENV{GIT_AUTHOR_NAME} = $name; @@ -98,8 +98,12 @@ sub do_spamc { return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1; } -sub search_index_sync { +sub index_sync { my ($git_dir) = @_; + + # potentially user-visible, ignore errors: + system('git', "--git-dir=$git_dir", 'update-server-info'); + eval { require PublicInbox::SearchIdx; PublicInbox::SearchIdx->new($git_dir, 2)->index_sync; diff --git a/t/cgi.t b/t/cgi.t index 18632ce..4ce6514 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -102,6 +102,24 @@ EOF like($res->{head}, qr/Status:\s*404/i, "index returns 404"); } +# dumb HTTP support +{ + my $path = "/test/info/refs"; + my $res = cgi_run($path); + like($res->{head}, qr/Status:\s*200/i, "info/refs readable"); + my $orig = $res->{body}; + + local $ENV{HTTP_RANGE} = 'bytes=5-10'; + $res = cgi_run($path); + like($res->{head}, qr/Status:\s*206/i, "info/refs partial OK"); + is($res->{body}, substr($orig, 5, 6), 'partial body OK'); + + local $ENV{HTTP_RANGE} = 'bytes=5-'; + $res = cgi_run($path); + like($res->{head}, qr/Status:\s*206/i, "info/refs partial past end OK"); + is($res->{body}, substr($orig, 5), 'partial body OK past end'); +} + # atom feeds { local $ENV{HOME} = $home; -- EW