diff options
-rw-r--r-- | MANIFEST | 3 | ||||
-rw-r--r-- | lib/PublicInbox/Gcf2.pm | 56 | ||||
-rw-r--r-- | lib/PublicInbox/gcf2_libgit2.h | 139 | ||||
-rw-r--r-- | t/gcf2.t | 112 |
4 files changed, 310 insertions, 0 deletions
@@ -128,6 +128,7 @@ lib/PublicInbox/Filter/Mirror.pm lib/PublicInbox/Filter/RubyLang.pm lib/PublicInbox/Filter/SubjectTag.pm lib/PublicInbox/Filter/Vger.pm +lib/PublicInbox/Gcf2.pm lib/PublicInbox/GetlineBody.pm lib/PublicInbox/Git.pm lib/PublicInbox/GitAsyncCat.pm @@ -212,6 +213,7 @@ lib/PublicInbox/WwwStatic.pm lib/PublicInbox/WwwStream.pm lib/PublicInbox/WwwText.pm lib/PublicInbox/Xapcmd.pm +lib/PublicInbox/gcf2_libgit2.h sa_config/Makefile sa_config/README sa_config/root/etc/spamassassin/public-inbox.pre @@ -275,6 +277,7 @@ t/filter_mirror.t t/filter_rubylang.t t/filter_subjecttag.t t/filter_vger.t +t/gcf2.t t/git-http-backend.psgi t/git.fast-import-data t/git.t diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm new file mode 100644 index 00000000..6ac3aa18 --- /dev/null +++ b/lib/PublicInbox/Gcf2.pm @@ -0,0 +1,56 @@ +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# git-cat-file based on libgit2 +package PublicInbox::Gcf2; +use strict; +use PublicInbox::Spawn qw(which popen_rd); +use Fcntl qw(LOCK_EX); +my (%CFG, $c_src, $lockfh); +BEGIN { + # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY + # to ~/.cache/public-inbox/inline-c if it exists + my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} // + die 'PERL_INLINE_DIRECTORY not defined'; + my $f = "$inline_dir/.public-inbox.lock"; + open $lockfh, '>', $f or die "failed to open $f: $!\n"; + my $pc = which($ENV{PKG_CONFIG} // 'pkg-config'); + my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!); + my $rdr = {}; + open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!"; + for my $x (qw(libgit2)) { + my $l = popen_rd([$pc, '--libs', $x], undef, $rdr); + $l = do { local $/; <$l> }; + next if $?; + my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr); + $c = do { local $/; <$c> }; + next if $?; + + # note: we name C source files .h to prevent + # ExtUtils::MakeMaker from automatically trying to + # build them. + my $f = "$dir/gcf2_$x.h"; + if (open(my $fh, '<', $f)) { + chomp($l, $c); + local $/; + $c_src = <$fh>; + $CFG{LIBS} = $l; + $CFG{CCFLAGSEX} = $c; + last; + } else { + die "E: $f: $!\n"; + } + } + die "E: libgit2 not installed\n" unless $c_src; + + # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking + flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n"; +} + +# we use Capitalized and ALLCAPS for compatibility with old Inline::C +use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();'; +use Inline C => $c_src; +undef $c_src; +undef %CFG; +undef $lockfh; +1; diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h new file mode 100644 index 00000000..d9c79cf9 --- /dev/null +++ b/lib/PublicInbox/gcf2_libgit2.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2020 all contributors <meta@public-inbox.org> + * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> + * + * libgit2 for Inline::C + * Avoiding Git::Raw since it doesn't guarantee a stable API, + * while libgit2 itself seems reasonably stable. + */ +#include <git2.h> +#include <sys/uio.h> +#include <errno.h> +#include <poll.h> + +static void croak_if_err(int rc, const char *msg) +{ + if (rc != GIT_OK) { + const git_error *e = giterr_last(); + + croak("%d %s (%s)", rc, msg, e ? e->message : "unknown"); + } +} + +SV *new() +{ + git_odb *odb; + SV *ref, *self; + int rc = git_odb_new(&odb); + croak_if_err(rc, "git_odb_new"); + + ref = newSViv((IV)odb); + self = newRV_noinc(ref); + sv_bless(self, gv_stashpv("PublicInbox::Gcf2", GV_ADD)); + SvREADONLY_on(ref); + + return self; +} + +static git_odb *odb_ptr(SV *self) +{ + return (git_odb *)SvIV(SvRV(self)); +} + +void DESTROY(SV *self) +{ + git_odb_free(odb_ptr(self)); +} + +/* needs "$GIT_DIR/objects", not $GIT_DIR */ +void add_alternate(SV *self, const char *objects_path) +{ + int rc = git_odb_add_disk_alternate(odb_ptr(self), objects_path); + croak_if_err(rc, "git_odb_add_disk_alternate"); +} + +/* this requires an unabbreviated git OID */ +#define CAPA(v) (sizeof(v) / sizeof((v)[0])) +void cat_oid(SV *self, int fd, SV *oidsv) +{ + /* + * adjust when libgit2 gets SHA-256 support, we return the + * same header as git-cat-file --batch "$OID $TYPE $SIZE\n" + */ + char hdr[GIT_OID_HEXSZ + sizeof(" commit 18446744073709551615")]; + struct iovec vec[3]; + size_t nvec = CAPA(vec); + git_oid oid; + git_odb_object *object = NULL; + int rc, err = 0; + STRLEN oidlen; + char *oidptr = SvPV(oidsv, oidlen); + + /* same trailer as git-cat-file --batch */ + vec[2].iov_len = 1; + vec[2].iov_base = "\n"; + + rc = git_oid_fromstrn(&oid, oidptr, oidlen); + if (rc == GIT_OK) + rc = git_odb_read(&object, odb_ptr(self), &oid); + if (rc == GIT_OK) { + vec[0].iov_base = hdr; + vec[1].iov_base = (void *)git_odb_object_data(object); + vec[1].iov_len = git_odb_object_size(object); + + git_oid_nfmt(hdr, GIT_OID_HEXSZ, git_odb_object_id(object)); + vec[0].iov_len = GIT_OID_HEXSZ + + snprintf(hdr + GIT_OID_HEXSZ, + sizeof(hdr) - GIT_OID_HEXSZ, + " %s %zu\n", + git_object_type2string( + git_odb_object_type(object)), + vec[1].iov_len); + } else { + vec[0].iov_base = oidptr; + vec[0].iov_len = oidlen; + vec[1].iov_base = " missing"; + vec[1].iov_len = strlen(vec[1].iov_base); + } + while (nvec && !err) { + ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec); + + if (w > 0) { + size_t done = 0; + size_t i; + + for (i = CAPA(vec) - nvec; i < CAPA(vec); i++) { + if (w >= vec[i].iov_len) { + /* fully written vec */ + w -= vec[i].iov_len; + done++; + } else { /* partially written vec */ + char *p = vec[i].iov_base; + vec[i].iov_base = p + w; + vec[i].iov_len -= w; + break; + } + } + nvec -= done; + } else if (w < 0) { + err = errno; + switch (err) { + case EAGAIN: { + struct pollfd pfd; + pfd.events = POLLOUT; + pfd.fd = fd; + poll(&pfd, 1, -1); + } + /* fall-through */ + case EINTR: + err = 0; + } + } else { /* w == 0 */ + err = ENOSPC; + } + } + if (object) + git_odb_object_free(object); + if (err) + croak("writev error: %s", strerror(err)); +} diff --git a/t/gcf2.t b/t/gcf2.t new file mode 100644 index 00000000..c67efb6c --- /dev/null +++ b/t/gcf2.t @@ -0,0 +1,112 @@ +#!perl -w +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use PublicInbox::TestCommon; +use Test::More; +use Fcntl qw(:seek); +use IO::Handle (); +use POSIX qw(_exit); +require_mods('PublicInbox::Gcf2'); +use_ok 'PublicInbox::Gcf2'; +my $gcf2 = PublicInbox::Gcf2::new(); +is(ref($gcf2), 'PublicInbox::Gcf2', '::new works'); +chomp(my $objdir = xqx([qw(git rev-parse --git-path objects)])); +if ($objdir =~ /\A--git-path\n/) { # git <2.5 + chomp($objdir = xqx([qw(git rev-parse --git-dir)])); + $objdir .= '/objects'; + $objdir = undef unless -d $objdir; +} + +my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3'; +open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!"; +$agpl = do { local $/; <$agpl> }; + +SKIP: { + skip 'not in git worktree', 15 unless defined($objdir); + $gcf2->add_alternate($objdir); + open my $fh, '+>', undef or BAIL_OUT "open: $!"; + my $fd = fileno($fh); + $fh->autoflush(1); + + $gcf2->cat_oid($fd, 'invalid'); + seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; + is(do { local $/; <$fh> }, "invalid missing\n", 'got missing message'); + + seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; + $gcf2->cat_oid($fd, '0'x40); + seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; + is(do { local $/; <$fh> }, ('0'x40)." missing\n", + 'got missing message for 0x40'); + + seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; + $gcf2->cat_oid($fd, $COPYING); + my $buf; + my $ck_copying = sub { + my ($desc) = @_; + seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; + is(<$fh>, "$COPYING blob 34520\n", 'got expected header'); + $buf = do { local $/; <$fh> }; + is(chop($buf), "\n", 'got trailing \\n'); + is($buf, $agpl, "AGPL matches ($desc)"); + }; + $ck_copying->('regular file'); + + $^O eq 'linux' or skip('pipe tests are Linux-only', 12); + my $size = -s $fh; + for my $blk (1, 0) { + my ($r, $w); + pipe($r, $w) or BAIL_OUT $!; + fcntl($w, 1031, 4096) or + skip('Linux too old for F_SETPIPE_SZ', 12); + $w->blocking($blk); + seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!"; + truncate($fh, 0) or BAIL_OUT "truncate: $!"; + defined(my $pid = fork) or BAIL_OUT "fork: $!"; + if ($pid == 0) { + close $w; + tick; # wait for parent to block on writev + $buf = do { local $/; <$r> }; + print $fh $buf or _exit(1); + _exit(0); + } + $gcf2->cat_oid(fileno($w), $COPYING); + close $w or BAIL_OUT "close: $!"; + is(waitpid($pid, 0), $pid, 'child exited'); + is($?, 0, 'no error in child'); + $ck_copying->("pipe blocking($blk)"); + + pipe($r, $w) or BAIL_OUT $!; + fcntl($w, 1031, 4096) or BAIL_OUT $!; + $w->blocking($blk); + close $r; + local $SIG{PIPE} = 'IGNORE'; + eval { $gcf2->cat_oid(fileno($w), $COPYING) }; + like($@, qr/writev error:/, 'got writev error'); + } +} + +if (my $nr = $ENV{TEST_LEAK_NR}) { + open my $null, '>', '/dev/null' or BAIL_OUT "open /dev/null: $!"; + my $fd = fileno($null); + my $cat = $ENV{TEST_LEAK_CAT} // 10; + diag "checking for leaks... (TEST_LEAK_NR=$nr TEST_LEAK_CAT=$cat)"; + local $SIG{PIPE} = 'IGNORE'; + my ($r, $w); + pipe($r, $w); + close $r; + my $broken = fileno($w); + for (1..$nr) { + my $obj = PublicInbox::Gcf2::new(); + if (defined($objdir)) { + $obj->add_alternate($objdir); + for (1..$cat) { + $obj->cat_oid($fd, $COPYING); + eval { $obj->cat_oid($broken, $COPYING) }; + $obj->cat_oid($fd, '0'x40); + $obj->cat_oid($fd, 'invalid'); + } + } + } +} +done_testing; |