user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
blob 0f9675797ffd937eea27927c90c735179a766318 3459 bytes (raw)
name: lib/PublicInbox/Gcf2.pm 	 # note: path name is non-authoritative(*)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
 
# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>

# backend for a git-cat-file-workalike based on libgit2,
# other libgit2 stuff may go here, too.
package PublicInbox::Gcf2;
use strict;
use v5.10.1;
use PublicInbox::Spawn qw(which popen_rd); # may set PERL_INLINE_DIRECTORY
use Fcntl qw(LOCK_EX);
use IO::Handle; # autoflush
my (%CFG, $c_src, $lockfh);
BEGIN {
	# PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
	# to ~/.cache/public-inbox/inline-c if it exists
	my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
		die 'PERL_INLINE_DIRECTORY not defined';
	my $f = "$inline_dir/.public-inbox.lock";
	open $lockfh, '>', $f or die "failed to open $f: $!\n";
	my $pc = which($ENV{PKG_CONFIG} // 'pkg-config') //
		die "pkg-config missing for libgit2";
	my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
	my $rdr = {};
	open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
	for my $x (qw(libgit2)) {
		my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
		$l = do { local $/; <$l> };
		next if $?;
		my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
		$c = do { local $/; <$c> };
		next if $?;

		# note: we name C source files .h to prevent
		# ExtUtils::MakeMaker from automatically trying to
		# build them.
		my $f = "$dir/gcf2_$x.h";
		if (open(my $fh, '<', $f)) {
			chomp($l, $c);
			local $/;
			defined($c_src = <$fh>) or die "read $f: $!\n";
			$CFG{LIBS} = $l;
			$CFG{CCFLAGSEX} = $c;
			last;
		} else {
			die "E: $f: $!\n";
		}
	}
	die "E: libgit2 not installed\n" unless $c_src;

	# CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
	flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
}

# we use Capitalized and ALLCAPS for compatibility with old Inline::C
use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();';
use Inline C => $c_src;
undef $c_src;
undef %CFG;
undef $lockfh;

sub add_alt ($$) {
	my ($gcf2, $objdir) = @_;

	# libgit2 (tested 0.27.7+dfsg.1-0.2 and 0.28.3+dfsg.1-1~bpo10+1
	# in Debian) doesn't handle relative epochs properly when nested
	# multiple levels.  Add all the absolute paths to workaround it,
	# since $EXTINDEX_DIR/ALL.git/objects/info/alternates uses absolute
	# paths to reference $V2INBOX_DIR/all.git/objects and
	# $V2INBOX_DIR/all.git/objects/info/alternates uses relative paths
	# to refer to $V2INBOX_DIR/git/$EPOCH.git/objects
	#
	# See https://bugs.debian.org/975607
	if (open(my $fh, '<', "$objdir/info/alternates")) {
		chomp(my @abs_alt = grep(m!^/!, <$fh>));
		$gcf2->add_alternate($_) for @abs_alt;
	}
	$gcf2->add_alternate($objdir);
	1;
}

# Usage: $^X -MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop
# (see lib/PublicInbox/Gcf2Client.pm)
sub loop () {
	my $gcf2 = new();
	my %seen;
	STDERR->autoflush(1);
	STDOUT->autoflush(1);

	while (<STDIN>) {
		chomp;
		my ($oid, $git_dir) = split(/ /, $_, 2);
		$seen{$git_dir} //= add_alt($gcf2, "$git_dir/objects");
		if (!$gcf2->cat_oid(1, $oid)) {
			# retry once if missing.  We only get unabbreviated OIDs
			# from SQLite or Xapian DBs, here, so malicious clients
			# can't trigger excessive retries:
			warn "I: $$ $oid missing, retrying in $git_dir\n";

			$gcf2 = new();
			%seen = ($git_dir => add_alt($gcf2,"$git_dir/objects"));

			if ($gcf2->cat_oid(1, $oid)) {
				warn "I: $$ $oid found after retry\n";
			} else {
				warn "W: $$ $oid missing after retry\n";
				print "$oid missing\n"; # mimic git-cat-file
			}
		}
	}
}

1;

debug log:

solving 0f967579 ...
found 0f967579 in https://80x24.org/public-inbox.git

(*) Git path names are given by the tree(s) the blob belongs to.
    Blobs themselves have no identifier aside from the hash of its contents.^

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).