about summary refs log tree commit homepage
path: root/lib/PublicInbox/Git.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-09-19 09:37:14 +0000
committerEric Wong <e@80x24.org>2020-09-19 21:39:47 +0000
commitd78f50649a5545d66a61b5465ca7f5ce4be398ea (patch)
tree7a0dc7bde92e89bd57dca861624fac8cae7c1be6 /lib/PublicInbox/Git.pm
parent881a5493a8c970c10c051cc55d10d2968e71e691 (diff)
downloadpublic-inbox-d78f50649a5545d66a61b5465ca7f5ce4be398ea.tar.gz
It seems easiest to have a singleton Gcf2Client client object
per daemon worker for all inboxes to use.  This reduces overall
FD usage from pipes.

The `public-inbox-gcf2' command + manpage are gone and a `$^X'
one-liner is used, instead.  This saves inodes for internal
commands and hopefully makes it easier to avoid mismatched
PERL5LIB include paths (as noticed during development :x).

We'll also make the existing cat-file process management
infrastructure more resilient to BOFHs on process killing
sprees (or in case our libgit2-based code fails on us).

(Rare) PublicInbox::WWW PSGI users NOT using public-inbox-httpd
won't automatically benefit from this change, and extra
configuration will be required (to be documented later).
Diffstat (limited to 'lib/PublicInbox/Git.pm')
-rw-r--r--lib/PublicInbox/Git.pm31
1 files changed, 20 insertions, 11 deletions
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 6bb82b6b..2323cecc 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -185,11 +185,12 @@ sub cat_async_step ($$) {
         my $rbuf = delete($self->{cat_rbuf}) // \(my $new = '');
         my ($bref, $oid, $type, $size);
         my $head = my_readline($self->{in}, $rbuf);
+        # ->fail may be called via Gcf2Client.pm
         if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) {
                 ($oid, $type, $size) = ($1, $2, $3 + 0);
                 $bref = my_read($self->{in}, $rbuf, $size + 1) or
-                        fail($self, defined($bref) ? 'read EOF' : "read: $!");
-                chop($$bref) eq "\n" or fail($self, 'LF missing after blob');
+                        $self->fail(defined($bref) ? 'read EOF' : "read: $!");
+                chop($$bref) eq "\n" or $self->fail('LF missing after blob');
         } elsif ($head =~ s/ missing\n//s) {
                 $oid = $head;
                 # ref($req) indicates it's already been retried
@@ -201,7 +202,7 @@ sub cat_async_step ($$) {
                 $type = 'missing';
                 $oid = ref($req) ? $$req : $req if $oid eq '';
         } else {
-                fail($self, "Unexpected result from async git cat-file: $head");
+                $self->fail("Unexpected result from async git cat-file: $head");
         }
         eval { $cb->($bref, $oid, $type, $size, $arg) };
         $self->{cat_rbuf} = $rbuf if $$rbuf ne '';
@@ -304,10 +305,12 @@ sub check {
 
 sub _destroy {
         my ($self, $rbuf, $in, $out, $pid, $err) = @_;
-        my $p = delete $self->{$pid} or return;
         delete @$self{($rbuf, $in, $out)};
         delete $self->{$err} if $err; # `err_c'
 
+        # GitAsyncCat::event_step may delete {pid}
+        my $p = delete $self->{$pid} or return;
+
         # PublicInbox::DS may not be loaded
         eval { PublicInbox::DS::dwaitpid($p, undef, undef) };
         waitpid($p, 0) if $@; # wait synchronously if not in event loop
@@ -315,14 +318,21 @@ sub _destroy {
 
 sub cat_async_abort ($) {
         my ($self) = @_;
-        my $inflight = delete $self->{inflight} or die 'BUG: not in async';
+        if (my $inflight = delete $self->{inflight}) {
+                while (@$inflight) {
+                        my ($req, $cb, $arg) = splice(@$inflight, 0, 3);
+                        $req =~ s/ .*//; # drop git_dir for Gcf2Client
+                        eval { $cb->(undef, $req, undef, undef, $arg) };
+                        warn "E: $req: $@ (in abort)\n" if $@;
+                }
+        }
         cleanup($self);
 }
 
 sub fail {
         my ($self, $msg) = @_;
-        $self->{inflight} ? cat_async_abort($self) : cleanup($self);
-        croak("git $self->{git_dir}: $msg");
+        cat_async_abort($self);
+        croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg");
 }
 
 sub popen {
@@ -352,6 +362,7 @@ sub cleanup {
         !!($self->{pid} || $self->{pid_c});
 }
 
+
 # assuming a well-maintained repo, this should be a somewhat
 # accurate estimation of its size
 # TODO: show this in the WWW UI as a hint to potential cloners
@@ -397,7 +408,7 @@ sub pub_urls {
 sub cat_async_begin {
         my ($self) = @_;
         cleanup($self) if $self->alternates_changed;
-        batch_prepare($self);
+        $self->batch_prepare;
         die 'BUG: already in async' if $self->{inflight};
         $self->{inflight} = [];
 }
@@ -413,11 +424,9 @@ sub cat_async ($$$;$) {
         push(@$inflight, $oid, $cb, $arg);
 }
 
-# this is safe to call inside $cb, but not guaranteed to enqueue
-# returns true if successful, undef if not.
 sub async_prefetch {
         my ($self, $oid, $cb, $arg) = @_;
-        if (defined($self->{async_cat}) && (my $inflight = $self->{inflight})) {
+        if (my $inflight = $self->{inflight}) {
                 # we could use MAX_INFLIGHT here w/o the halving,
                 # but lets not allow one client to monopolize a git process
                 if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) {