about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-11-29 10:52:29 +0000
committerEric Wong <e@80x24.org>2020-11-30 00:47:32 +0000
commitf68c45d33656a4602c2d8b8c1a8be813f8a9a70d (patch)
tree3aad75a457a7a65c092528a48316cc2cf54a9585 /lib/PublicInbox
parentea885bf0f76dc1795dc771667be721ec0fed5482 (diff)
downloadpublic-inbox-f68c45d33656a4602c2d8b8c1a8be813f8a9a70d.tar.gz
This makes GitAsyncCat more resilient to bugs in Gcf2 or even
git-cat-file itself.  I noticed -imapd stuck on read(2) from
the Gcf2 pipe, so there may be a bug somewhere in Gcf2 or
PublicInbox::Git.  This should make us more resilient to them
and hopefully help us notice and fix them.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Git.pm28
-rw-r--r--lib/PublicInbox/GitAsyncCat.pm6
2 files changed, 22 insertions, 12 deletions
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 917fa4a1..d53427d7 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -12,17 +12,19 @@ use v5.10.1;
 use parent qw(Exporter);
 use POSIX ();
 use IO::Handle; # ->autoflush
-use Errno qw(EINTR);
+use Errno qw(EINTR EAGAIN);
 use File::Glob qw(bsd_glob GLOB_NOSORT);
 use File::Spec ();
 use Time::HiRes qw(stat);
 use PublicInbox::Spawn qw(popen_rd);
 use PublicInbox::Tmpfile;
+use IO::Poll qw(POLLIN);
 use Carp qw(croak);
 use Digest::SHA ();
 our @EXPORT_OK = qw(git_unquote git_quote);
 our $PIPE_BUFSIZ = 65536; # Linux default
 our $in_cleanup;
+our $RDTIMEO = 60_000; # milliseconds
 
 use constant MAX_INFLIGHT =>
         (($^O eq 'linux' ? 4096 : POSIX::_POSIX_PIPE_BUF()) * 3)
@@ -132,6 +134,8 @@ sub _bidi_pipe {
         $self->{$in} = $in_r;
 }
 
+sub poll_in ($) { IO::Poll::_poll($RDTIMEO, fileno($_[0]), my $ev = POLLIN) }
+
 sub my_read ($$$) {
         my ($fh, $rbuf, $len) = @_;
         my $left = $len - length($$rbuf);
@@ -140,9 +144,12 @@ sub my_read ($$$) {
                 $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
                 if ($r) {
                         $left -= $r;
+                } elsif (defined($r)) { # EOF
+                        return 0;
                 } else {
-                        next if (!defined($r) && $! == EINTR);
-                        return $r;
+                        next if ($! == EAGAIN and poll_in($fh));
+                        next if $! == EINTR; # may be set by sysread or poll_in
+                        return; # unrecoverable error
                 }
         }
         \substr($$rbuf, 0, $len, '');
@@ -154,9 +161,15 @@ sub my_readline ($$) {
                 if ((my $n = index($$rbuf, "\n")) >= 0) {
                         return substr($$rbuf, 0, $n + 1, '');
                 }
-                my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf));
-                next if $r || (!defined($r) && $! == EINTR);
-                return defined($r) ? '' : undef; # EOF or error
+                my $r = sysread($fh, $$rbuf, $PIPE_BUFSIZ, length($$rbuf))
+                                                                and next;
+
+                # return whatever's left on EOF
+                return substr($$rbuf, 0, length($$rbuf)+1, '') if defined($r);
+
+                next if ($! == EAGAIN and poll_in($fh));
+                next if $! == EINTR; # may be set by sysread or poll_in
+                return; # unrecoverable error
         }
 }
 
@@ -204,7 +217,8 @@ sub cat_async_step ($$) {
                 $type = 'missing';
                 $oid = ref($req) ? $$req : $req if $oid eq '';
         } else {
-                $self->fail("Unexpected result from async git cat-file: $head");
+                my $err = $! ? " ($!)" : '';
+                $self->fail("bad result from async cat-file: $head$err");
         }
         $self->{cat_rbuf} = $rbuf if $$rbuf ne '';
         eval { $cb->($bref, $oid, $type, $size, $arg) };
diff --git a/lib/PublicInbox/GitAsyncCat.pm b/lib/PublicInbox/GitAsyncCat.pm
index be51f673..dc97af16 100644
--- a/lib/PublicInbox/GitAsyncCat.pm
+++ b/lib/PublicInbox/GitAsyncCat.pm
@@ -3,11 +3,6 @@
 #
 # internal class used by PublicInbox::Git + PublicInbox::DS
 # This parses the output pipe of "git cat-file --batch"
-#
-# Note: this does NOT set the non-blocking flag, we expect `git cat-file'
-# to be a local process, and git won't start writing a blob until it's
-# fully read.  So minimize context switching and read as much as possible
-# and avoid holding a buffer in our heap any longer than it has to live.
 package PublicInbox::GitAsyncCat;
 use strict;
 use parent qw(PublicInbox::DS Exporter);
@@ -69,6 +64,7 @@ sub git_async_cat ($$$$) {
         $gitish->{async_cat} //= do {
                 # read-only end of pipe (Gcf2Client is write-only end)
                 my $self = bless { gitish => $gitish }, __PACKAGE__;
+                $gitish->{in}->blocking(0);
                 $self->SUPER::new($gitish->{in}, EPOLLIN|EPOLLET);
                 \undef; # this is a true ref()
         };