From d39a8a440c9b5c59e1fa058467f64034f8974e0e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 2 Feb 2016 04:00:08 +0000 Subject: www: support git cloning via dumb HTTP This is enabled by default, for now. Smart HTTP cloning support will be added later, but it will be optional since it can be highly CPU and memory intensive. --- lib/PublicInbox/GitHTTPDumb.pm | 121 +++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/WWW.pm | 10 ++++ 2 files changed, 131 insertions(+) create mode 100644 lib/PublicInbox/GitHTTPDumb.pm (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/GitHTTPDumb.pm b/lib/PublicInbox/GitHTTPDumb.pm new file mode 100644 index 00000000..c088d8c4 --- /dev/null +++ b/lib/PublicInbox/GitHTTPDumb.pm @@ -0,0 +1,121 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ + +# when no endpoints match, fallback to this and serve a static file +# This can serve Smart HTTP in the future. +package PublicInbox::GitHTTPDumb; +use strict; +use warnings; +use Fcntl qw(:seek); + +# n.b. serving "description" and "cloneurl" should be innocuous enough to +# not cause problems. serving "config" might... +my @text = qw[HEAD info/refs + objects/info/(?:http-alternates|alternates|packs) + cloneurl description]; + +my @binary = qw! + objects/[a-f0-9]{2}/[a-f0-9]{38} + objects/pack/pack-[a-f0-9]{40}\.(?:pack|idx) + !; + +our $ANY = join('|', @binary, @text); +my $BIN = join('|', @binary); +my $TEXT = join('|', @text); + +sub r { + [ $_[0] , [qw(Content-Type text/plain Content-Length 0) ], [] ] +} + +sub serve { + my ($cgi, $git, $path) = @_; + my $type; + if ($path =~ /\A(?:$BIN)\z/o) { + $type = 'application/octet-stream'; + } elsif ($path =~ /\A(?:$TEXT)\z/o) { + $type = 'text/plain'; + } else { + return r(404); + } + my $f = "$git->{git_dir}/$path"; + return r(404) unless -f $f && -r _; + my @st = stat(_); + my $size = $st[7]; + + # TODO: If-Modified-Since and Last-Modified + open my $in, '<', $f or return r(404); + my $code = 200; + my $len = $size; + my @h; + + my $env = $cgi->{env} || \%ENV; + my $range = $env->{HTTP_RANGE}; + if (defined $range && $range =~ /\bbytes=(\d*)-(\d*)\z/) { + ($code, $len) = prepare_range($cgi, $in, \@h, $1, $2, $size); + if ($code == 416) { + push @h, 'Content-Range', "bytes */$size"; + return [ 416, \@h, [] ]; + } + } + + push @h, 'Content-Type', $type, 'Content-Length', $len; + sub { + my ($res) = @_; # Plack callback + my $fh = $res->([ $code, \@h ]); + my $buf; + my $n = 8192; + while ($len > 0) { + $n = $len if $len < $n; + my $r = read($in, $buf, $n); + last if (!defined($r) || $r <= 0); + $len -= $r; + $fh->write($buf); + } + $fh->close; + } +} + +sub prepare_range { + my ($cgi, $in, $h, $beg, $end, $size) = @_; + my $code = 200; + my $len = $size; + if ($beg eq '') { + if ($end ne '') { # "bytes=-$end" => last N bytes + $beg = $size - $end; + $beg = 0 if $beg < 0; + $end = $size - 1; + $code = 206; + } else { + $code = 416; + } + } else { + if ($beg > $size) { + $code = 416; + } elsif ($end eq '' || $end >= $size) { + $end = $size - 1; + $code = 206; + } elsif ($end < $size) { + $code = 206; + } else { + $code = 416; + } + } + if ($code == 206) { + $len = $end - $beg + 1; + if ($len <= 0) { + $code = 416; + } else { + seek($in, $beg, SEEK_SET) or return [ 500, [], [] ]; + push @$h, qw(Accept-Ranges bytes Content-Range); + push @$h, "bytes $beg-$end/$size"; + + # FIXME: Plack::Middleware::Deflater bug? + if (my $env = $cgi->{env}) { + $env->{'psgix.no-compress'} = 1; + } + } + } + ($code, $len); +} + +1; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index d5635d84..1c6936f7 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -18,6 +18,7 @@ use URI::Escape qw(uri_escape_utf8 uri_unescape); use constant SSOMA_URL => 'http://ssoma.public-inbox.org/'; use constant PI_URL => 'http://public-inbox.org/'; require PublicInbox::Git; +use PublicInbox::GitHTTPDumb; our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; our $MID_RE = qr!([^/]+)!; our $END_RE = qr!(f/|T/|t/|t\.mbox(?:\.gz)?|t\.atom|raw|)!; @@ -42,6 +43,10 @@ sub run { } elsif ($path_info =~ m!$LISTNAME_RE/(?:atom\.xml|new\.atom)\z!o) { invalid_list($ctx, $1) || get_atom($ctx); + } elsif ($path_info =~ m!$LISTNAME_RE/ + ($PublicInbox::GitHTTPDumb::ANY)\z!ox) { + my $path = $2; + invalid_list($ctx, $1) || serve_git($cgi, $ctx->{git}, $path); } elsif ($path_info =~ m!$LISTNAME_RE/$MID_RE/$END_RE\z!o) { msg_page($ctx, $1, $2, $3); @@ -395,4 +400,9 @@ sub msg_page { r404($ctx); } +sub serve_git { + my ($cgi, $git, $path) = @_; + PublicInbox::GitHTTPDumb::serve($cgi, $git, $path); +} + 1; -- cgit v1.2.3-24-ge0c7