From d78f50649a5545d66a61b5465ca7f5ce4be398ea Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 19 Sep 2020 09:37:14 +0000 Subject: gcf2: wire up read-only daemons and rm -gcf2 script It seems easiest to have a singleton Gcf2Client client object per daemon worker for all inboxes to use. This reduces overall FD usage from pipes. The `public-inbox-gcf2' command + manpage are gone and a `$^X' one-liner is used, instead. This saves inodes for internal commands and hopefully makes it easier to avoid mismatched PERL5LIB include paths (as noticed during development :x). We'll also make the existing cat-file process management infrastructure more resilient to BOFHs on process killing sprees (or in case our libgit2-based code fails on us). (Rare) PublicInbox::WWW PSGI users NOT using public-inbox-httpd won't automatically benefit from this change, and extra configuration will be required (to be documented later). --- lib/PublicInbox/Git.pm | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'lib/PublicInbox/Git.pm') diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 6bb82b6b..2323cecc 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -185,11 +185,12 @@ sub cat_async_step ($$) { my $rbuf = delete($self->{cat_rbuf}) // \(my $new = ''); my ($bref, $oid, $type, $size); my $head = my_readline($self->{in}, $rbuf); + # ->fail may be called via Gcf2Client.pm if ($head =~ /^([0-9a-f]{40,}) (\S+) ([0-9]+)$/) { ($oid, $type, $size) = ($1, $2, $3 + 0); $bref = my_read($self->{in}, $rbuf, $size + 1) or - fail($self, defined($bref) ? 'read EOF' : "read: $!"); - chop($$bref) eq "\n" or fail($self, 'LF missing after blob'); + $self->fail(defined($bref) ? 'read EOF' : "read: $!"); + chop($$bref) eq "\n" or $self->fail('LF missing after blob'); } elsif ($head =~ s/ missing\n//s) { $oid = $head; # ref($req) indicates it's already been retried @@ -201,7 +202,7 @@ sub cat_async_step ($$) { $type = 'missing'; $oid = ref($req) ? $$req : $req if $oid eq ''; } else { - fail($self, "Unexpected result from async git cat-file: $head"); + $self->fail("Unexpected result from async git cat-file: $head"); } eval { $cb->($bref, $oid, $type, $size, $arg) }; $self->{cat_rbuf} = $rbuf if $$rbuf ne ''; @@ -304,10 +305,12 @@ sub check { sub _destroy { my ($self, $rbuf, $in, $out, $pid, $err) = @_; - my $p = delete $self->{$pid} or return; delete @$self{($rbuf, $in, $out)}; delete $self->{$err} if $err; # `err_c' + # GitAsyncCat::event_step may delete {pid} + my $p = delete $self->{$pid} or return; + # PublicInbox::DS may not be loaded eval { PublicInbox::DS::dwaitpid($p, undef, undef) }; waitpid($p, 0) if $@; # wait synchronously if not in event loop @@ -315,14 +318,21 @@ sub _destroy { sub cat_async_abort ($) { my ($self) = @_; - my $inflight = delete $self->{inflight} or die 'BUG: not in async'; + if (my $inflight = delete $self->{inflight}) { + while (@$inflight) { + my ($req, $cb, $arg) = splice(@$inflight, 0, 3); + $req =~ s/ .*//; # drop git_dir for Gcf2Client + eval { $cb->(undef, $req, undef, undef, $arg) }; + warn "E: $req: $@ (in abort)\n" if $@; + } + } cleanup($self); } sub fail { my ($self, $msg) = @_; - $self->{inflight} ? cat_async_abort($self) : cleanup($self); - croak("git $self->{git_dir}: $msg"); + cat_async_abort($self); + croak(ref($self) . ' ' . ($self->{git_dir} // '') . ": $msg"); } sub popen { @@ -352,6 +362,7 @@ sub cleanup { !!($self->{pid} || $self->{pid_c}); } + # assuming a well-maintained repo, this should be a somewhat # accurate estimation of its size # TODO: show this in the WWW UI as a hint to potential cloners @@ -397,7 +408,7 @@ sub pub_urls { sub cat_async_begin { my ($self) = @_; cleanup($self) if $self->alternates_changed; - batch_prepare($self); + $self->batch_prepare; die 'BUG: already in async' if $self->{inflight}; $self->{inflight} = []; } @@ -413,11 +424,9 @@ sub cat_async ($$$;$) { push(@$inflight, $oid, $cb, $arg); } -# this is safe to call inside $cb, but not guaranteed to enqueue -# returns true if successful, undef if not. sub async_prefetch { my ($self, $oid, $cb, $arg) = @_; - if (defined($self->{async_cat}) && (my $inflight = $self->{inflight})) { + if (my $inflight = $self->{inflight}) { # we could use MAX_INFLIGHT here w/o the halving, # but lets not allow one client to monopolize a git process if (scalar(@$inflight) < int(MAX_INFLIGHT/2)) { -- cgit v1.2.3-24-ge0c7