about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-09-19 09:37:13 +0000
committerEric Wong <e@80x24.org>2020-09-19 21:39:46 +0000
commit881a5493a8c970c10c051cc55d10d2968e71e691 (patch)
tree2ad0e7a2658ae855b4c1a5bae663633dfca01098
parent3750b2e2952e55fe4a04c73fc78f25c5e07d0525 (diff)
downloadpublic-inbox-881a5493a8c970c10c051cc55d10d2968e71e691.tar.gz
This amortizes the cost of recreating PublicInbox::Gcf2 objects
when alternates change in v2 all.git.
-rw-r--r--lib/PublicInbox/Gcf2Client.pm13
-rw-r--r--lib/PublicInbox/Git.pm5
-rw-r--r--lib/PublicInbox/GitAsyncCat.pm24
-rwxr-xr-xscript/public-inbox-gcf214
-rw-r--r--t/gcf2_client.t11
5 files changed, 27 insertions, 40 deletions
diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
index a048cd1a..30f85c71 100644
--- a/lib/PublicInbox/Gcf2Client.pm
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -23,19 +23,6 @@ sub new {
         $self;
 }
 
-sub add_git_dir {
-        my ($self, $git_dir) = @_;
-
-        # ensure buffers are drained, length($git_dir) may exceed
-        # PIPE_BUF on platforms where PIPE_BUF is only 512 bytes
-        my $inflight = $self->{inflight};
-        while (scalar(@$inflight)) {
-                $self->cat_async_step($inflight);
-        }
-        print { $self->{out} } $git_dir, "\n" or
-                                $self->fail("write error: $!");
-}
-
 # always false, since -gcf2 retries internally
 sub alternates_changed {}
 
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index b49b5bd3..6bb82b6b 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -190,7 +190,8 @@ sub cat_async_step ($$) {
                 $bref = my_read($self->{in}, $rbuf, $size + 1) or
                         fail($self, defined($bref) ? 'read EOF' : "read: $!");
                 chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
-        } elsif ($head =~ / missing$/) {
+        } elsif ($head =~ s/ missing\n//s) {
+                $oid = $head;
                 # ref($req) indicates it's already been retried
                 # -gcf2 retries internally, so it never hits this path:
                 if (!ref($req) && !$in_cleanup && $self->alternates_changed) {
@@ -198,7 +199,7 @@ sub cat_async_step ($$) {
                                                 $req, $cb, $arg);
                 }
                 $type = 'missing';
-                $oid = ref($req) ? $$req : $req;
+                $oid = ref($req) ? $$req : $req if $oid eq '';
         } else {
                 fail($self, "Unexpected result from async git cat-file: $head");
         }
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index db1a7f94..8a54c608 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -16,21 +16,27 @@ our @EXPORT = qw(git_async_cat);
 
 sub event_step {
         my ($self) = @_;
-        my $git = $self->{git};
-        return $self->close if ($git->{in} // 0) != ($self->{sock} // 1);
-        my $inflight = $git->{inflight};
+        my $gitish = $self->{gitish};
+        return $self->close if ($gitish->{in} // 0) != ($self->{sock} // 1);
+        my $inflight = $gitish->{inflight};
         if ($inflight && @$inflight) {
-                $git->cat_async_step($inflight);
-                $self->requeue if @$inflight || exists $git->{cat_rbuf};
+                $gitish->cat_async_step($inflight);
+                $self->requeue if @$inflight || exists $gitish->{cat_rbuf};
         }
 }
 
 sub git_async_cat ($$$$) {
         my ($git, $oid, $cb, $arg) = @_;
-        $git->cat_async($oid, $cb, $arg);
-        $git->{async_cat} //= do {
-                my $self = bless { git => $git }, __PACKAGE__;
-                $self->SUPER::new($git->{in}, EPOLLIN|EPOLLET);
+        my $gitish = $git->{gcf2c}; # PublicInbox::Gcf2Client
+        if ($gitish) {
+                $oid .= " $git->{git_dir}";
+        } else {
+                $gitish = $git;
+        }
+        $gitish->cat_async($oid, $cb, $arg);
+        $gitish->{async_cat} //= do {
+                my $self = bless { gitish => $gitish }, __PACKAGE__;
+                $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET);
                 \undef; # this is a true ref()
         };
 }
diff --git a/script/public-inbox-gcf2 b/script/public-inbox-gcf2
index d2d2ac8b..4a44b654 100755
--- a/script/public-inbox-gcf2
+++ b/script/public-inbox-gcf2
@@ -3,7 +3,6 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 eval { require PublicInbox::Gcf2 };
 die "libgit2 development package or Inline::C missing for $0: $@\n" if $@;
-my @dirs; # may get big (30K-100K)
 my $gcf2 = PublicInbox::Gcf2::new();
 use IO::Handle; # autoflush
 STDERR->autoflush(1);
@@ -11,19 +10,16 @@ STDOUT->autoflush(1);
 
 while (<STDIN>) {
         chomp;
-        if (m!\A/!) { # +/path/to/git-dir
-                push @dirs, $_;
-                $gcf2->add_alternate("$_/objects");
-        } elsif (!$gcf2->cat_oid(1, $_)) {
+        my ($oid, $git_dir) = split(/ /, $_, 2);
+        $gcf2->add_alternate("$git_dir/objects");
+        if (!$gcf2->cat_oid(1, $oid)) {
                 # retry once if missing.  We only get unabbreviated OIDs
                 # from SQLite or Xapian DBs, here, so malicious clients
                 # can't trigger excessive retries:
-                my $oid = $_;
-                warn "I: $$ $oid missing, retrying...\n";
+                warn "I: $$ $oid missing, retrying in $git_dir...\n";
 
-                # clients may need to wait a bit for this:
                 $gcf2 = PublicInbox::Gcf2::new();
-                $gcf2->add_alternate("$_/objects") for @dirs;
+                $gcf2->add_alternate("$git_dir/objects");
 
                 if ($gcf2->cat_oid(1, $oid)) {
                         warn "I: $$ $oid found after retry\n";
diff --git a/t/gcf2_client.t b/t/gcf2_client.t
index 0f7e7203..19462379 100644
--- a/t/gcf2_client.t
+++ b/t/gcf2_client.t
@@ -27,9 +27,7 @@ my $err_f = "$tmpdir/err";
         local $ENV{PATH} = getcwd()."/blib/script:$ENV{PATH}";
         open my $err, '>', $err_f or BAIL_OUT $!;
         my $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
-        $gcf2c->add_git_dir($git_a);
-
-        $gcf2c->cat_async($tree, sub {
+        $gcf2c->cat_async("$tree $git_a", sub {
                 my ($bref, $oid, $type, $size, $arg) = @_;
                 is($oid, $tree, 'got expected OID');
                 is($size, 30, 'got expected length');
@@ -45,7 +43,7 @@ my $err_f = "$tmpdir/err";
         is($estr, '', 'nothing in stderr');
 
         my $trunc = substr($tree, 0, 39);
-        $gcf2c->cat_async($trunc, sub {
+        $gcf2c->cat_async("$trunc $git_a", sub {
                 my ($bref, $oid, $type, $size, $arg) = @_;
                 is(undef, $bref, 'missing bref is undef');
                 is($oid, $trunc, 'truncated OID printed');
@@ -63,8 +61,7 @@ my $err_f = "$tmpdir/err";
         # try failed alternates lookup
         open $err, '>', $err_f or BAIL_OUT $!;
         $gcf2c = PublicInbox::Gcf2Client::new({ 2 => $err });
-        $gcf2c->add_git_dir($git_b);
-        $gcf2c->cat_async($tree, sub {
+        $gcf2c->cat_async("$tree $git_b", sub {
                 my ($bref, $oid, $type, $size, $arg) = @_;
                 is(undef, $bref, 'missing bref from alt is undef');
                 $called++;
@@ -79,7 +76,7 @@ my $err_f = "$tmpdir/err";
         print $alt "$git_a/objects\n" or BAIL_OUT $!;
         close $alt or BAIL_OUT;
         my $expect = xqx(['git', "--git-dir=$git_a", qw(cat-file tree), $tree]);
-        $gcf2c->cat_async($tree, sub {
+        $gcf2c->cat_async("$tree $git_a", sub {
                 my ($bref, $oid, $type, $size, $arg) = @_;
                 is($oid, $tree, 'oid match on alternates retry');
                 is($$bref, $expect, 'tree content matched');