public-inbox.git  about / heads / tags
an "archives first" approach to mailing lists
blob 396aa7839231ffd0c739d0afad8419da06de8ab3 4811 bytes (raw)
$ git show HEAD:lib/PublicInbox/GitHTTPBackend.pm	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
 
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# when no endpoints match, fallback to this and serve a static file
# or smart HTTP.  This is our wrapper for git-http-backend(1)
package PublicInbox::GitHTTPBackend;
use strict;
use v5.10.1;
use Fcntl qw(:seek);
use IO::Handle; # ->flush
use HTTP::Date qw(time2str);
use PublicInbox::Limiter;
use PublicInbox::Qspawn;
use PublicInbox::Tmpfile;
use PublicInbox::WwwStatic qw(r @NO_CACHE);
use Carp ();

# 32 is same as the git-daemon connection limit
my $default_limiter = PublicInbox::Limiter->new(32);

# n.b. serving "description" and "cloneurl" should be innocuous enough to
# not cause problems.  serving "config" might...
my @text = qw[HEAD info/refs info/attributes
	objects/info/(?:http-alternates|alternates|packs)
	cloneurl description];

my @binary = ('objects/[a-f0-9]{2}/[a-f0-9]{38,62}',
	'objects/pack/pack-[a-f0-9]{40,64}\.(?:pack|idx)');

our $ANY = join('|', @binary, @text, 'git-upload-pack');
my $TEXT = join('|', @text);

sub serve {
	my ($env, $git, $path) = @_;

	# Documentation/technical/http-protocol.txt in git.git
	# requires one and exactly one query parameter:
	if ($env->{QUERY_STRING} =~ /\Aservice=git-[A-Za-z0-9_]+-pack\z/ ||
				$path =~ /\Agit-[A-Za-z0-9_]+-pack\z/) {
		my $ok = serve_smart($env, $git, $path);
		return $ok if $ok;
	}

	serve_dumb($env, $git, $path);
}

sub ucarp { Carp::carp(@_); undef }

my $prev = 0;
my $exp;
sub cache_one_year {
	my ($h) = @_;
	my $t = time + 31536000;
	push @$h, 'Expires', $t == $prev ? $exp : ($exp = time2str($prev = $t)),
		'Cache-Control', 'public, max-age=31536000';
}

sub serve_dumb {
	my ($env, $git, $path) = @_;

	my $h = [];
	my $type;
	if ($path =~ m!\Aobjects/[a-f0-9]{2}/[a-f0-9]{38,62}\z!) {
		$type = 'application/x-git-loose-object';
		cache_one_year($h);
	} elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40,64}\.pack\z!) {
		$type = 'application/x-git-packed-objects';
		cache_one_year($h);
	} elsif ($path =~ m!\Aobjects/pack/pack-[a-f0-9]{40,64}\.idx\z!) {
		$type = 'application/x-git-packed-objects-toc';
		cache_one_year($h);
	} elsif ($path =~ /\A(?:$TEXT)\z/o) {
		$type = 'text/plain';
		push @$h, @NO_CACHE;
	} else {
		return r(404);
	}
	$path = "$git->{git_dir}/$path";
	PublicInbox::WwwStatic::response($env, $h, $path, $type);
}

sub ghb_parse_hdr { # header parser for Qspawn
	my ($r, $bref, @dumb_args) = @_;
	my $res = parse_cgi_headers($r, $bref) or return; # incomplete
	$res->[0] == 403 ? serve_dumb(@dumb_args) : $res;
}

# returns undef if 403 so it falls back to dumb HTTP
sub serve_smart {
	my ($env, $git, $path) = @_;
	my %env = %ENV;
	# GIT_COMMITTER_NAME, GIT_COMMITTER_EMAIL
	# may be set in the server-process and are passed as-is
	foreach my $name (qw(QUERY_STRING
				REMOTE_USER REMOTE_ADDR
				HTTP_CONTENT_ENCODING
				HTTP_GIT_PROTOCOL
				CONTENT_TYPE
				SERVER_PROTOCOL
				REQUEST_METHOD)) {
		my $val = $env->{$name};
		$env{$name} = $val if defined $val;
	}
	my $limiter = $git->{-httpbackend_limiter} || $default_limiter;
	$env{GIT_HTTP_EXPORT_ALL} = '1';
	$env{PATH_TRANSLATED} = "$git->{git_dir}/$path";
	my $rdr = input_prepare($env) or return r(500);
	$rdr->{quiet} = 1;
	my $qsp = PublicInbox::Qspawn->new([qw(git http-backend)], \%env, $rdr);
	$qsp->psgi_yield($env, $limiter, \&ghb_parse_hdr, $env, $git, $path);
}

sub input_prepare {
	my ($env) = @_;

	my $input = $env->{'psgi.input'};
	my $fd = eval { fileno($input) };
	return { 0 => $fd } if (defined $fd && $fd >= 0);
	my $id = "git-http.input.$env->{REMOTE_ADDR}:$env->{REMOTE_PORT}";
	my $in = tmpfile($id) // return ucarp("tmpfile: $!");
	my $buf;
	while (1) {
		my $r = $input->read($buf, 8192) // return ucarp("read $!");
		last if $r == 0;
		print $in $buf // return ucarp("print: $!");
	}
	# ensure it's visible to git-http-backend(1):
	$in->flush // return ucarp("flush: $!");
	sysseek($in, 0, SEEK_SET) // return ucarp($env, "seek: $!");
	{ 0 => $in };
}

sub parse_cgi_headers { # {parse_hdr} for Qspawn
	my ($r, $bref, $ctx) = @_;
	return r(500) unless defined $r && $r >= 0;
	$$bref =~ s/\A(.*?)\r?\n\r?\n//s or return $r == 0 ? r(500) : undef;
	my $h = $1;
	my $code = 200;
	my @h;
	foreach my $l (split(/\r?\n/, $h)) {
		my ($k, $v) = split(/:\s*/, $l, 2);
		if ($k =~ /\AStatus\z/i) {
			($code) = ($v =~ /\b([0-9]+)\b/);
		} else {
			push @h, $k, $v;
		}
	}

	# fallback to WwwCoderepo if cgit 404s
	if ($code == 404 && $ctx->{www} && !$ctx->{_coderepo_tried}++) {
		my $wcb = delete $ctx->{env}->{'qspawn.wcb'};
		$ctx->{env}->{'plack.skip-deflater'} = 1; # prevent 2x gzip
		$ctx->{env}->{'qspawn.fallback'} = $code;
		my $res = $ctx->{www}->coderepo->srv($ctx);
		$ctx->{env}->{'qspawn.wcb'} = $wcb;
		$res; # CODE or ARRAY ref
	} else {
		[ $code, \@h ]
	}
}

1;

git clone https://public-inbox.org/public-inbox.git
git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git