about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--MANIFEST3
-rw-r--r--lib/PublicInbox/Gcf2.pm56
-rw-r--r--lib/PublicInbox/gcf2_libgit2.h139
-rw-r--r--t/gcf2.t112
4 files changed, 310 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index 04a3744f..0d3a7073 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -128,6 +128,7 @@ lib/PublicInbox/Filter/Mirror.pm
 lib/PublicInbox/Filter/RubyLang.pm
 lib/PublicInbox/Filter/SubjectTag.pm
 lib/PublicInbox/Filter/Vger.pm
+lib/PublicInbox/Gcf2.pm
 lib/PublicInbox/GetlineBody.pm
 lib/PublicInbox/Git.pm
 lib/PublicInbox/GitAsyncCat.pm
@@ -212,6 +213,7 @@ lib/PublicInbox/WwwStatic.pm
 lib/PublicInbox/WwwStream.pm
 lib/PublicInbox/WwwText.pm
 lib/PublicInbox/Xapcmd.pm
+lib/PublicInbox/gcf2_libgit2.h
 sa_config/Makefile
 sa_config/README
 sa_config/root/etc/spamassassin/public-inbox.pre
@@ -275,6 +277,7 @@ t/filter_mirror.t
 t/filter_rubylang.t
 t/filter_subjecttag.t
 t/filter_vger.t
+t/gcf2.t
 t/git-http-backend.psgi
 t/git.fast-import-data
 t/git.t
diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm
new file mode 100644
index 00000000..6ac3aa18
--- /dev/null
+++ b/lib/PublicInbox/Gcf2.pm
@@ -0,0 +1,56 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+#
+# git-cat-file based on libgit2
+package PublicInbox::Gcf2;
+use strict;
+use PublicInbox::Spawn qw(which popen_rd);
+use Fcntl qw(LOCK_EX);
+my (%CFG, $c_src, $lockfh);
+BEGIN {
+        # PublicInbox::Spawn will set PERL_INLINE_DIRECTORY
+        # to ~/.cache/public-inbox/inline-c if it exists
+        my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //
+                die 'PERL_INLINE_DIRECTORY not defined';
+        my $f = "$inline_dir/.public-inbox.lock";
+        open $lockfh, '>', $f or die "failed to open $f: $!\n";
+        my $pc = which($ENV{PKG_CONFIG} // 'pkg-config');
+        my ($dir) = (__FILE__ =~ m!\A(.+?)/[^/]+\z!);
+        my $rdr = {};
+        open $rdr->{2}, '>', '/dev/null' or die "open /dev/null: $!";
+        for my $x (qw(libgit2)) {
+                my $l = popen_rd([$pc, '--libs', $x], undef, $rdr);
+                $l = do { local $/; <$l> };
+                next if $?;
+                my $c = popen_rd([$pc, '--cflags', $x], undef, $rdr);
+                $c = do { local $/; <$c> };
+                next if $?;
+
+                # note: we name C source files .h to prevent
+                # ExtUtils::MakeMaker from automatically trying to
+                # build them.
+                my $f = "$dir/gcf2_$x.h";
+                if (open(my $fh, '<', $f)) {
+                        chomp($l, $c);
+                        local $/;
+                        $c_src = <$fh>;
+                        $CFG{LIBS} = $l;
+                        $CFG{CCFLAGSEX} = $c;
+                        last;
+                } else {
+                        die "E: $f: $!\n";
+                }
+        }
+        die "E: libgit2 not installed\n" unless $c_src;
+
+        # CentOS 7.x ships Inline 0.53, 0.64+ has built-in locking
+        flock($lockfh, LOCK_EX) or die "LOCK_EX failed on $f: $!\n";
+}
+
+# we use Capitalized and ALLCAPS for compatibility with old Inline::C
+use Inline C => Config => %CFG, BOOT => 'git_libgit2_init();';
+use Inline C => $c_src;
+undef $c_src;
+undef %CFG;
+undef $lockfh;
+1;
diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h
new file mode 100644
index 00000000..d9c79cf9
--- /dev/null
+++ b/lib/PublicInbox/gcf2_libgit2.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2020 all contributors <meta@public-inbox.org>
+ * License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+ *
+ * libgit2 for Inline::C
+ * Avoiding Git::Raw since it doesn't guarantee a stable API,
+ * while libgit2 itself seems reasonably stable.
+ */
+#include <git2.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <poll.h>
+
+static void croak_if_err(int rc, const char *msg)
+{
+        if (rc != GIT_OK) {
+                const git_error *e = giterr_last();
+
+                croak("%d %s (%s)", rc, msg, e ? e->message : "unknown");
+        }
+}
+
+SV *new()
+{
+        git_odb *odb;
+        SV *ref, *self;
+        int rc = git_odb_new(&odb);
+        croak_if_err(rc, "git_odb_new");
+
+        ref = newSViv((IV)odb);
+        self = newRV_noinc(ref);
+        sv_bless(self, gv_stashpv("PublicInbox::Gcf2", GV_ADD));
+        SvREADONLY_on(ref);
+
+        return self;
+}
+
+static git_odb *odb_ptr(SV *self)
+{
+        return (git_odb *)SvIV(SvRV(self));
+}
+
+void DESTROY(SV *self)
+{
+        git_odb_free(odb_ptr(self));
+}
+
+/* needs "$GIT_DIR/objects", not $GIT_DIR */
+void add_alternate(SV *self, const char *objects_path)
+{
+        int rc = git_odb_add_disk_alternate(odb_ptr(self), objects_path);
+        croak_if_err(rc, "git_odb_add_disk_alternate");
+}
+
+/* this requires an unabbreviated git OID */
+#define CAPA(v) (sizeof(v) / sizeof((v)[0]))
+void cat_oid(SV *self, int fd, SV *oidsv)
+{
+        /*
+         * adjust when libgit2 gets SHA-256 support, we return the
+         * same header as git-cat-file --batch "$OID $TYPE $SIZE\n"
+         */
+        char hdr[GIT_OID_HEXSZ + sizeof(" commit 18446744073709551615")];
+        struct iovec vec[3];
+        size_t nvec = CAPA(vec);
+        git_oid oid;
+        git_odb_object *object = NULL;
+        int rc, err = 0;
+        STRLEN oidlen;
+        char *oidptr = SvPV(oidsv, oidlen);
+
+        /* same trailer as git-cat-file --batch */
+        vec[2].iov_len = 1;
+        vec[2].iov_base = "\n";
+
+        rc = git_oid_fromstrn(&oid, oidptr, oidlen);
+        if (rc == GIT_OK)
+                rc = git_odb_read(&object, odb_ptr(self), &oid);
+        if (rc == GIT_OK) {
+                vec[0].iov_base = hdr;
+                vec[1].iov_base = (void *)git_odb_object_data(object);
+                vec[1].iov_len = git_odb_object_size(object);
+
+                git_oid_nfmt(hdr, GIT_OID_HEXSZ, git_odb_object_id(object));
+                vec[0].iov_len = GIT_OID_HEXSZ +
+                                snprintf(hdr + GIT_OID_HEXSZ,
+                                        sizeof(hdr) - GIT_OID_HEXSZ,
+                                        " %s %zu\n",
+                                        git_object_type2string(
+                                                git_odb_object_type(object)),
+                                        vec[1].iov_len);
+        } else {
+                vec[0].iov_base = oidptr;
+                vec[0].iov_len = oidlen;
+                vec[1].iov_base = " missing";
+                vec[1].iov_len = strlen(vec[1].iov_base);
+        }
+        while (nvec && !err) {
+                ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec);
+
+                if (w > 0) {
+                        size_t done = 0;
+                        size_t i;
+
+                        for (i = CAPA(vec) - nvec; i < CAPA(vec); i++) {
+                                if (w >= vec[i].iov_len) {
+                                        /* fully written vec */
+                                        w -= vec[i].iov_len;
+                                        done++;
+                                } else { /* partially written vec */
+                                        char *p = vec[i].iov_base;
+                                        vec[i].iov_base = p + w;
+                                        vec[i].iov_len -= w;
+                                        break;
+                                }
+                        }
+                        nvec -= done;
+                } else if (w < 0) {
+                        err = errno;
+                        switch (err) {
+                        case EAGAIN: {
+                                struct pollfd pfd;
+                                pfd.events = POLLOUT;
+                                pfd.fd = fd;
+                                poll(&pfd, 1, -1);
+                        }
+                                /* fall-through */
+                        case EINTR:
+                                err = 0;
+                        }
+                } else { /* w == 0 */
+                        err = ENOSPC;
+                }
+        }
+        if (object)
+                git_odb_object_free(object);
+        if (err)
+                croak("writev error: %s", strerror(err));
+}
diff --git a/t/gcf2.t b/t/gcf2.t
new file mode 100644
index 00000000..c67efb6c
--- /dev/null
+++ b/t/gcf2.t
@@ -0,0 +1,112 @@
+#!perl -w
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict;
+use PublicInbox::TestCommon;
+use Test::More;
+use Fcntl qw(:seek);
+use IO::Handle ();
+use POSIX qw(_exit);
+require_mods('PublicInbox::Gcf2');
+use_ok 'PublicInbox::Gcf2';
+my $gcf2 = PublicInbox::Gcf2::new();
+is(ref($gcf2), 'PublicInbox::Gcf2', '::new works');
+chomp(my $objdir = xqx([qw(git rev-parse --git-path objects)]));
+if ($objdir =~ /\A--git-path\n/) { # git <2.5
+        chomp($objdir = xqx([qw(git rev-parse --git-dir)]));
+        $objdir .= '/objects';
+        $objdir = undef unless -d $objdir;
+}
+
+my $COPYING = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3';
+open my $agpl, '<', 'COPYING' or BAIL_OUT "AGPL-3 missing: $!";
+$agpl = do { local $/; <$agpl> };
+
+SKIP: {
+        skip 'not in git worktree', 15 unless defined($objdir);
+        $gcf2->add_alternate($objdir);
+        open my $fh, '+>', undef or BAIL_OUT "open: $!";
+        my $fd = fileno($fh);
+        $fh->autoflush(1);
+
+        $gcf2->cat_oid($fd, 'invalid');
+        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+        is(do { local $/; <$fh> }, "invalid missing\n", 'got missing message');
+
+        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+        $gcf2->cat_oid($fd, '0'x40);
+        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+        is(do { local $/; <$fh> }, ('0'x40)." missing\n",
+                'got missing message for 0x40');
+
+        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+        $gcf2->cat_oid($fd, $COPYING);
+        my $buf;
+        my $ck_copying = sub {
+                my ($desc) = @_;
+                seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+                is(<$fh>, "$COPYING blob 34520\n", 'got expected header');
+                $buf = do { local $/; <$fh> };
+                is(chop($buf), "\n", 'got trailing \\n');
+                is($buf, $agpl, "AGPL matches ($desc)");
+        };
+        $ck_copying->('regular file');
+
+        $^O eq 'linux' or skip('pipe tests are Linux-only', 12);
+        my $size = -s $fh;
+        for my $blk (1, 0) {
+                my ($r, $w);
+                pipe($r, $w) or BAIL_OUT $!;
+                fcntl($w, 1031, 4096) or
+                        skip('Linux too old for F_SETPIPE_SZ', 12);
+                $w->blocking($blk);
+                seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
+                truncate($fh, 0) or BAIL_OUT "truncate: $!";
+                defined(my $pid = fork) or BAIL_OUT "fork: $!";
+                if ($pid == 0) {
+                        close $w;
+                        tick; # wait for parent to block on writev
+                        $buf = do { local $/; <$r> };
+                        print $fh $buf or _exit(1);
+                        _exit(0);
+                }
+                $gcf2->cat_oid(fileno($w), $COPYING);
+                close $w or BAIL_OUT "close: $!";
+                is(waitpid($pid, 0), $pid, 'child exited');
+                is($?, 0, 'no error in child');
+                $ck_copying->("pipe blocking($blk)");
+
+                pipe($r, $w) or BAIL_OUT $!;
+                fcntl($w, 1031, 4096) or BAIL_OUT $!;
+                $w->blocking($blk);
+                close $r;
+                local $SIG{PIPE} = 'IGNORE';
+                eval { $gcf2->cat_oid(fileno($w), $COPYING) };
+                like($@, qr/writev error:/, 'got writev error');
+        }
+}
+
+if (my $nr = $ENV{TEST_LEAK_NR}) {
+        open my $null, '>', '/dev/null' or BAIL_OUT "open /dev/null: $!";
+        my $fd = fileno($null);
+        my $cat = $ENV{TEST_LEAK_CAT} // 10;
+        diag "checking for leaks... (TEST_LEAK_NR=$nr TEST_LEAK_CAT=$cat)";
+        local $SIG{PIPE} = 'IGNORE';
+        my ($r, $w);
+        pipe($r, $w);
+        close $r;
+        my $broken = fileno($w);
+        for (1..$nr) {
+                my $obj = PublicInbox::Gcf2::new();
+                if (defined($objdir)) {
+                        $obj->add_alternate($objdir);
+                        for (1..$cat) {
+                                $obj->cat_oid($fd, $COPYING);
+                                eval { $obj->cat_oid($broken, $COPYING) };
+                                $obj->cat_oid($fd, '0'x40);
+                                $obj->cat_oid($fd, 'invalid');
+                        }
+                }
+        }
+}
+done_testing;