about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-09-19 09:37:11 +0000
committerEric Wong <e@80x24.org>2020-09-19 21:39:45 +0000
commitdc03cabb5d167618797e9e8a6ec615bda7b0638b (patch)
tree92b55c227d80cca785ac0c2edb0acb0c5c956549
parent7c0c47e26af17918031d449d24abe40ad452f51a (diff)
downloadpublic-inbox-dc03cabb5d167618797e9e8a6ec615bda7b0638b.tar.gz
Since we only get OIDs from trusted local data sources
(over.sqlite3), we can safely retry within the -gcf2 process
without worry about clients spamming us with requests for
invalid OIDs and triggering reopens.
-rw-r--r--lib/PublicInbox/Gcf2Client.pm11
-rw-r--r--lib/PublicInbox/Git.pm5
-rw-r--r--lib/PublicInbox/gcf2_libgit2.h17
-rwxr-xr-xscript/public-inbox-gcf225
-rw-r--r--t/gcf2.t34
-rw-r--r--t/gcf2_client.t54
6 files changed, 109 insertions, 37 deletions
diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
index 71fbb1d1..5120698f 100644
--- a/lib/PublicInbox/Gcf2Client.pm
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -7,11 +7,13 @@ use PublicInbox::Spawn qw(popen_rd);
 use IO::Handle ();
 
 sub new {
-        my $self = shift->SUPER::new('/nonexistent');
+        my ($rdr) = @_;
+        my $self = bless {}, __PACKAGE__;
         my ($out_r, $out_w);
         pipe($out_r, $out_w) or $self->fail("pipe failed: $!");
-        my $cmd = [ 'public-inbox-gcf2' ];
-        @$self{qw(in pid)} = popen_rd($cmd, undef, { 0 => $out_r });
+        $rdr //= {};
+        $rdr->{0} = $out_r;
+        @$self{qw(in pid)} = popen_rd(['public-inbox-gcf2'], undef, $rdr);
         $self->{inflight} = [];
         $self->{out} = $out_w;
         fcntl($out_w, 1031, 4096) if $^O eq 'linux'; # 1031: F_SETPIPE_SZ
@@ -32,4 +34,7 @@ sub add_git_dir {
                                 $self->fail("write error: $!");
 }
 
+# always false, since -gcf2 retries internally
+sub alternates_changed {}
+
 1;
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index a7ba57f9..b49b5bd3 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -192,7 +192,8 @@ sub cat_async_step ($$) {
                 chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
         } elsif ($head =~ / missing$/) {
                 # ref($req) indicates it's already been retried
-                if (!ref($req) && !$in_cleanup && alternates_changed($self)) {
+                # -gcf2 retries internally, so it never hits this path:
+                if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
                         return cat_async_retry($self, $inflight,
                                                 $req, $cb, $arg);
                 }
@@ -394,7 +395,7 @@ sub pub_urls {
 
 sub cat_async_begin {
         my ($self) = @_;
-        cleanup($self) if alternates_changed($self);
+        cleanup($self) if $self->alternates_changed;
         batch_prepare($self);
         die 'BUG: already in async' if $self->{inflight};
         $self->{inflight} = [];
diff --git a/lib/PublicInbox/gcf2_libgit2.h b/lib/PublicInbox/gcf2_libgit2.h
index d9c79cf9..800c6bad 100644
--- a/lib/PublicInbox/gcf2_libgit2.h
+++ b/lib/PublicInbox/gcf2_libgit2.h
@@ -52,9 +52,13 @@ void add_alternate(SV *self, const char *objects_path)
         croak_if_err(rc, "git_odb_add_disk_alternate");
 }
 
-/* this requires an unabbreviated git OID */
 #define CAPA(v) (sizeof(v) / sizeof((v)[0]))
-void cat_oid(SV *self, int fd, SV *oidsv)
+
+/*
+ * returns true on success, false on failure
+ * this requires an unabbreviated git OID
+ */
+int cat_oid(SV *self, int fd, SV *oidsv)
 {
         /*
          * adjust when libgit2 gets SHA-256 support, we return the
@@ -89,11 +93,8 @@ void cat_oid(SV *self, int fd, SV *oidsv)
                                         git_object_type2string(
                                                 git_odb_object_type(object)),
                                         vec[1].iov_len);
-        } else {
-                vec[0].iov_base = oidptr;
-                vec[0].iov_len = oidlen;
-                vec[1].iov_base = " missing";
-                vec[1].iov_len = strlen(vec[1].iov_base);
+        } else { /* caller retries */
+                nvec = 0;
         }
         while (nvec && !err) {
                 ssize_t w = writev(fd, vec + CAPA(vec) - nvec, nvec);
@@ -136,4 +137,6 @@ void cat_oid(SV *self, int fd, SV *oidsv)
                 git_odb_object_free(object);
         if (err)
                 croak("writev error: %s", strerror(err));
+
+        return rc == GIT_OK;
 }
diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2
index 51811698..d2d2ac8b 100755
--- a/script/public-inbox-gcf2
+++ b/script/public-inbox-gcf2
@@ -3,12 +3,33 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 eval { require PublicInbox::Gcf2 };
 die "libgit2 development package or Inline::C missing for $0: $@\n" if $@;
+my @dirs; # may get big (30K-100K)
 my $gcf2 = PublicInbox::Gcf2::new();
+use IO::Handle; # autoflush
+STDERR->autoflush(1);
+STDOUT->autoflush(1);
+
 while (<STDIN>) {
         chomp;
         if (m!\A/!) { # +/path/to/git-dir
+                push @dirs, $_;
                 $gcf2->add_alternate("$_/objects");
-        } else {
-                $gcf2->cat_oid(1, $_);
+        } elsif (!$gcf2->cat_oid(1, $_)) {
+                # retry once if missing.  We only get unabbreviated OIDs
+                # from SQLite or Xapian DBs, here, so malicious clients
+                # can't trigger excessive retries:
+                my $oid = $_;
+                warn "I: $$ $oid missing, retrying...\n";
+
+                # clients may need to wait a bit for this:
+                $gcf2 = PublicInbox::Gcf2::new();
+                $gcf2->add_alternate("$_/objects") for @dirs;
+
+                if ($gcf2->cat_oid(1, $oid)) {
+                        warn "I: $$ $oid found after retry\n";
+                } else {
+                        warn "W: $$ $oid missing after retry\n";
+                        print "$oid missing\n"; # mimic git-cat-file
+                }
         }
 }
diff --git a/t/gcf2.t b/t/gcf2.t
index 9056b340..35b2f113 100644
--- a/t/gcf2.t
+++ b/t/gcf2.t
@@ -76,43 +76,41 @@ SKIP: {
         }
 
         open my $fh, '+>', undef or BAIL_OUT "open: $!";
-        my $fd = fileno($fh);
         $fh->autoflush(1);
 
-        $gcf2->cat_oid($fd, 'invalid');
+        ok(!$gcf2->cat_oid(fileno($fh), 'invalid'), 'invalid fails');
         seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-        is(do { local $/; <$fh> }, "invalid missing\n", 'got missing message');
+        is(do { local $/; <$fh> }, '', 'nothing written');
 
+        open $fh, '+>', undef or BAIL_OUT "open: $!";
+        ok(!$gcf2->cat_oid(fileno($fh), '0'x40), 'z40 fails');
         seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-        $gcf2->cat_oid($fd, '0'x40);
-        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-        is(do { local $/; <$fh> }, ('0'x40)." missing\n",
-                'got missing message for 0x40');
+        is(do { local $/; <$fh> }, '', 'nothing written for z40');
 
-        seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-        $gcf2->cat_oid($fd, $COPYING);
-        my $buf;
+        open $fh, '+>', undef or BAIL_OUT "open: $!";
         my $ck_copying = sub {
                 my ($desc) = @_;
                 seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
-                is(<$fh>, "$COPYING blob 34520\n", 'got expected header');
-                $buf = do { local $/; <$fh> };
+                is(<$fh>, "$COPYING blob 34520\n", "got expected header $desc");
+                my $buf = do { local $/; <$fh> };
                 is(chop($buf), "\n", 'got trailing \\n');
                 is($buf, $agpl, "AGPL matches ($desc)");
         };
+        ok($gcf2->cat_oid(fileno($fh), $COPYING), 'cat_oid normal');
         $ck_copying->('regular file');
 
         $gcf2 = PublicInbox::Gcf2::new();
         $gcf2->add_alternate("$tmpdir/objects");
-        $ck_copying->('alternates respected');
+        open $fh, '+>', undef or BAIL_OUT "open: $!";
+        ok($gcf2->cat_oid(fileno($fh), $COPYING), 'cat_oid alternate');
+        $ck_copying->('alternates after reopen');
 
-        $^O eq 'linux' or skip('pipe tests are Linux-only', 12);
-        my $size = -s $fh;
+        $^O eq 'linux' or skip('pipe tests are Linux-only', 14);
         for my $blk (1, 0) {
                 my ($r, $w);
                 pipe($r, $w) or BAIL_OUT $!;
                 fcntl($w, 1031, 4096) or
-                        skip('Linux too old for F_SETPIPE_SZ', 12);
+                        skip('Linux too old for F_SETPIPE_SZ', 14);
                 $w->blocking($blk);
                 seek($fh, 0, SEEK_SET) or BAIL_OUT "seek: $!";
                 truncate($fh, 0) or BAIL_OUT "truncate: $!";
@@ -120,11 +118,11 @@ SKIP: {
                 if ($pid == 0) {
                         close $w;
                         tick; # wait for parent to block on writev
-                        $buf = do { local $/; <$r> };
+                        my $buf = do { local $/; <$r> };
                         print $fh $buf or _exit(1);
                         _exit(0);
                 }
-                $gcf2->cat_oid(fileno($w), $COPYING);
+                ok($gcf2->cat_oid(fileno($w), $COPYING), "cat blocking=$blk");
                 close $w or BAIL_OUT "close: $!";
                 is(waitpid($pid, 0), $pid, 'child exited');
                 is($?, 0, 'no error in child');
diff --git a/t/gcf2_client.t b/t/gcf2_client.t
index 39f9f296..0f7e7203 100644
--- a/t/gcf2_client.t
+++ b/t/gcf2_client.t
@@ -10,19 +10,25 @@ use PublicInbox::Import;
 require_mods('PublicInbox::Gcf2');
 use_ok 'PublicInbox::Gcf2Client';
 my ($tmpdir, $for_destroy) = tmpdir();
-PublicInbox::Import::init_bare($tmpdir);
+my $git_a = "$tmpdir/a.git";
+my $git_b = "$tmpdir/b.git";
+PublicInbox::Import::init_bare($git_a);
+PublicInbox::Import::init_bare($git_b);
 my $fi_data = './t/git.fast-import-data';
 my $rdr = {};
 open $rdr->{0}, '<', $fi_data or BAIL_OUT $!;
-xsys([qw(git fast-import --quiet)], { GIT_DIR => $tmpdir }, $rdr);
+xsys([qw(git fast-import --quiet)], { GIT_DIR => $git_a }, $rdr);
 is($?, 0, 'fast-import succeeded');
 
 my $tree = 'fdbc43725f21f485051c17463b50185f4c3cf88c';
 my $called = 0;
+my $err_f = "$tmpdir/err";
 {
         local $ENV{PATH} = getcwd()."/blib/script:$ENV{PATH}";
-        my $gcf2c = PublicInbox::Gcf2Client->new;
-        $gcf2c->add_git_dir($tmpdir);
+        open my $err, '>', $err_f or BAIL_OUT $!;
+        my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
+        $gcf2c->add_git_dir($git_a);
+
         $gcf2c->cat_async($tree, sub {
                 my ($bref, $oid, $type, $size, $arg) = @_;
                 is($oid, $tree, 'got expected OID');
@@ -32,6 +38,12 @@ my $called = 0;
                 is($arg, 'hi', 'arg passed');
                 $called++;
         }, 'hi');
+        $gcf2c->cat_async_wait;
+
+        open $err, '<', $err_f or BAIL_OUT $!;
+        my $estr = do { local $/; <$err> };
+        is($estr, '', 'nothing in stderr');
+
         my $trunc = substr($tree, 0, 39);
         $gcf2c->cat_async($trunc, sub {
                 my ($bref, $oid, $type, $size, $arg) = @_;
@@ -42,6 +54,38 @@ my $called = 0;
                 is($arg, 'bye', 'arg passed when missing');
                 $called++;
         }, 'bye');
+        $gcf2c->cat_async_wait;
+
+        open $err, '<', $err_f or BAIL_OUT $!;
+        $estr = do { local $/; <$err> };
+        like($estr, qr/retrying/, 'warned about retry');
+
+        # try failed alternates lookup
+        open $err, '>', $err_f or BAIL_OUT $!;
+        $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
+        $gcf2c->add_git_dir($git_b);
+        $gcf2c->cat_async($tree, sub {
+                my ($bref, $oid, $type, $size, $arg) = @_;
+                is(undef, $bref, 'missing bref from alt is undef');
+                $called++;
+        });
+        $gcf2c->cat_async_wait;
+        open $err, '<', $err_f or BAIL_OUT $!;
+        $estr = do { local $/; <$err> };
+        like($estr, qr/retrying/, 'warned about retry before alt update');
+
+        # now try successful alternates lookup
+        open my $alt, '>>', "$git_b/objects/info/alternates" or BAIL_OUT $!;
+        print $alt "$git_a/objects\n" or BAIL_OUT $!;
+        close $alt or BAIL_OUT;
+        my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]);
+        $gcf2c->cat_async($tree, sub {
+                my ($bref, $oid, $type, $size, $arg) = @_;
+                is($oid, $tree, 'oid match on alternates retry');
+                is($$bref, $expect, 'tree content matched');
+                $called++;
+        });
+        $gcf2c->cat_async_wait;
 }
-is($called, 2, 'cat_async callbacks hit');
+is($called, 4, 'cat_async callbacks hit');
 done_testing;