user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
blob 0d31b01462a7dfb486f5e6f072bc248fc5a3c363 3922 bytes (raw)
name: lib/PublicInbox/Gcf2.pm 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
 
# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# backend for a git-cat-file-workalike based on libgit2,
# other libgit2 stuff may go here, too.
package PublicInbox::Gcf2;
use strict;
use v5.10.1;
use PublicInbox::Spawn qw(which popen_rd); # may set PERL_INLINE_DIRECTORY
use Fcntl qw(LOCK_EX SEEK_SET);
use IO::Handle; # autoflush
BEGIN {
	my (%CFG, $c_src);
	# PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
	# to ~/.cache/public-inbox/inline-c if it exists
	my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
		die 'PERL_INLINE_DIRECTORY not defined';
	my $f = "$inline_dir/.public-inbox.lock";
	open my $fh, '+>', $f or die "open($f): $!";
	my $pc = which($ENV{PKG_CONFIG} // 'pkg-config') //
		die "pkg-config missing for libgit2";
	my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
	my $rdr = {};
	open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
	for my $x (qw(libgit2)) {
		my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
		$l = do { local $/; <$l> };
		next if $?;
		my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
		$c = do { local $/; <$c> };
		next if $?;

		# note: we name C source files .h to prevent
		# ExtUtils::MakeMaker from automatically trying to
		# build them.
		my $f = "$dir/gcf2_$x.h";
		if (open(my $fh, '<', $f)) {
			chomp($l, $c);
			local $/;
			defined($c_src = <$fh>) or die "read $f: $!";
			$CFG{LIBS} = $l;
			$CFG{CCFLAGSEX} = $c;
			last;
		} else {
			die "E: $f: $!";
		}
	}
	die "E: libgit2 not installed\n" unless $c_src;

	open my $oldout, '>&', \*STDOUT or die "dup(1): $!";
	open my $olderr, '>&', \*STDERR or die "dup(2): $!";
	open STDOUT, '>&', $fh or die "1>$f: $!";
	open STDERR, '>&', $fh or die "2>$f: $!";
	STDERR->autoflush(1);
	STDOUT->autoflush(1);

	# CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
	flock($fh, LOCK_EX) or die "LOCK_EX($f): $!\n";
	# we use Capitalized and ALLCAPS for compatibility with old Inline::C
	eval <<'EOM';
use Inline C => Config => %CFG, BOOT => q[git_libgit2_init();];
use Inline C => $c_src, BUILD_NOISY => 1;
EOM
	my $err = $@;
	open(STDERR, '>&', $olderr) or warn "restore stderr: $!";
	open(STDOUT, '>&', $oldout) or warn "restore stdout: $!";
	if ($err) {
		seek($fh, 0, SEEK_SET);
		my @msg = <$fh>;
		die "Inline::C Gcf2 build failed:\n", $err, "\n", @msg;
	}
}

sub add_alt ($$) {
	my ($gcf2, $objdir) = @_;

	# libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
	# in Debian) doesn't handle relative epochs properly when nested
	# multiple levels.  Add all the absolute paths to workaround it,
	# since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute
	# paths to reference $V2INBOX_DIR/all.git/objects and
	# $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths
	# to refer to $V2INBOX_DIR/git/$EPOCH.git/objects
	#
	# See https://bugs.debian.org/975607
	if (open(my $fh, '<', "$objdir/info/alternates")) {
		chomp(my @abs_alt = grep(m!^/!, <$fh>));
		$gcf2->add_alternate($_) for @abs_alt;
	}
	$gcf2->add_alternate($objdir);
	1;
}

# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop
# (see lib/PublicInbox/Gcf2Client.pm)
sub loop () {
	my $gcf2 = new();
	my %seen;
	STDERR->autoflush(1);
	STDOUT->autoflush(1);

	while (<STDIN>) {
		chomp;
		my ($oid, $git_dir) = split(/ /, $_, 2);
		$seen{$git_dir} //= add_alt($gcf2, "$git_dir/objects");
		if (!$gcf2->cat_oid(1, $oid)) {
			# retry once if missing.  We only get unabbreviated OIDs
			# from SQLite or Xapian DBs, here, so malicious clients
			# can't trigger excessive retries:
			warn "I: $$ $oid missing, retrying in $git_dir\n";

			$gcf2 = new();
			%seen = ($git_dir => add_alt($gcf2,"$git_dir/objects"));

			if ($gcf2->cat_oid(1, $oid)) {
				warn "I: $$ $oid found after retry\n";
			} else {
				warn "W: $$ $oid missing after retry\n";
				print "$oid missing\n"; # mimic git-cat-file
			}
		}
	}
}

1;

debug log:

solving 0d31b014 ...
found 0d31b014 in https://80x24.org/public-inbox.git

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).