about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-11-02 09:35:33 +0000
committerEric Wong <e@80x24.org>2023-11-03 06:39:30 +0000
commitcfe25e6aa966144a9c96d1ba2c301fd5e1bad79b (patch)
tree4faa9092c422aafca9568e547634096da12667e9 /lib
parent16957ad7c4edcbf43294e67c8db06167594f0660 (diff)
downloadpublic-inbox-cfe25e6aa966144a9c96d1ba2c301fd5e1bad79b.tar.gz
This fixes two major problems with the use of tie for filehandles:

* no way to do fcntl, stat, etc. calls directly on the tied handle,
  forcing callers to use the `tied' perlop to access the underlying
  IO::Handle

* needing separate classes to handle blocking and non-blocking I/O

As a result, Git->cleanup_if_unlinked, InputPipe->consume,
and Qspawn->_yield_start have fewer bizzare bits and we
can call `$io->blocking(0)' directly instead of
`(tied *$io)->{fh}->blocking(0)'

Having a PublicInbox::IO class will also allow us to support
custom read buffering which allows inspecting the current state.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Gcf2Client.pm7
-rw-r--r--lib/PublicInbox/Git.pm8
-rw-r--r--lib/PublicInbox/IO.pm54
-rw-r--r--lib/PublicInbox/Import.pm4
-rw-r--r--lib/PublicInbox/InputPipe.pm1
-rw-r--r--lib/PublicInbox/LeiToMail.pm5
-rw-r--r--lib/PublicInbox/ProcessIO.pm75
-rw-r--r--lib/PublicInbox/ProcessIONBF.pm25
-rw-r--r--lib/PublicInbox/Qspawn.pm5
-rw-r--r--lib/PublicInbox/Spawn.pm6
10 files changed, 70 insertions, 120 deletions
diff --git a/lib/PublicInbox/Gcf2Client.pm b/lib/PublicInbox/Gcf2Client.pm
index f63a0335..5220c474 100644
--- a/lib/PublicInbox/Gcf2Client.pm
+++ b/lib/PublicInbox/Gcf2Client.pm
@@ -10,7 +10,7 @@ use PublicInbox::Gcf2; # fails if Inline::C or libgit2-dev isn't available
 use PublicInbox::Spawn qw(spawn);
 use Socket qw(AF_UNIX SOCK_STREAM);
 use PublicInbox::Syscall qw(EPOLLIN);
-use PublicInbox::ProcessIO;
+use PublicInbox::IO;
 use autodie qw(socketpair);
 
 # fields:
@@ -32,11 +32,10 @@ sub new  {
         $opt->{0} = $opt->{1} = $s2;
         my $cmd = [$^X, $^W ? ('-w') : (),
                         qw[-MPublicInbox::Gcf2 -e PublicInbox::Gcf2::loop]];
-        my $pid = spawn($cmd, $env, $opt);
-        my $sock = PublicInbox::ProcessIO->maybe_new($pid, $s1);
+        PublicInbox::IO::attach_pid($s1, spawn($cmd, $env, $opt));
         $self->{inflight} = [];
         $self->{epwatch} = \undef; # for Git->cleanup
-        $self->SUPER::new($sock, EPOLLIN);
+        $self->SUPER::new($s1, EPOLLIN);
 }
 
 sub gcf2_async ($$$;$) {
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index 3dac32be..d00f576e 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -18,7 +18,7 @@ use Errno qw(EINTR EAGAIN);
 use File::Glob qw(bsd_glob GLOB_NOSORT);
 use File::Spec ();
 use PublicInbox::Spawn qw(spawn popen_rd run_qx which);
-use PublicInbox::ProcessIONBF;
+use PublicInbox::IO;
 use PublicInbox::Tmpfile;
 use IO::Poll qw(POLLIN);
 use Carp qw(croak carp);
@@ -146,6 +146,7 @@ sub _sock_cmd {
         my ($self, $batch, $err_c) = @_;
         $self->{sock} and Carp::confess('BUG: {sock} exists');
         socketpair(my $s1, my $s2, AF_UNIX, SOCK_STREAM, 0);
+        $s1->blocking(0);
         my $opt = { pgid => 0, 0 => $s2, 1 => $s2 };
         my $gd = $self->{git_dir};
         if ($gd =~ s!/([^/]+/[^/]+)\z!/!) {
@@ -164,7 +165,7 @@ sub _sock_cmd {
                                                 $self->fail("tmpfile($id): $!");
         }
         my $pid = spawn(\@cmd, undef, $opt);
-        $self->{sock} = PublicInbox::ProcessIONBF->new($pid, $s1);
+        $self->{sock} = PublicInbox::IO::attach_pid($s1, $pid);
 }
 
 sub poll_in ($) { IO::Poll::_poll($RDTIMEO, fileno($_[0]), my $ev = POLLIN) }
@@ -638,8 +639,7 @@ sub cleanup_if_unlinked {
         my $ret = 0;
         for my $obj ($self, ($self->{ck} // ())) {
                 my $sock = $obj->{sock} // next;
-                my PublicInbox::ProcessIONBF $p = tied *$sock; # ProcessIONBF
-                my $pid = $p->{pid} // next;
+                my $pid = $sock->attached_pid // next;
                 open my $fh, '<', "/proc/$pid/maps" or return cleanup($self, 1);
                 while (<$fh>) {
                         # n.b. we do not restart for unlinked multi-pack-index
diff --git a/lib/PublicInbox/IO.pm b/lib/PublicInbox/IO.pm
new file mode 100644
index 00000000..63850a52
--- /dev/null
+++ b/lib/PublicInbox/IO.pm
@@ -0,0 +1,54 @@
+# Copyright (C) all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# supports reaping of children tied to a pipe or socket
+package PublicInbox::IO;
+use v5.12;
+use parent qw(IO::Handle);
+use PublicInbox::DS qw(awaitpid);
+
+# TODO: this can probably be the new home for read_all, try_cat
+# and maybe even buffered read/readline...
+
+sub waitcb { # awaitpid callback
+        my ($pid, $errref, $cb, @args) = @_;
+        $$errref = $?; # sets .cerr for _close
+        $cb->($pid, @args) if $cb;
+}
+
+sub attach_pid ($$;@) {
+        my ($io, $pid, @cb_arg) = @_;
+        bless $io, __PACKAGE__;
+        # we share $err (and not $self) with awaitpid to avoid a ref cycle
+        ${*$io}{pi_io_reap} = [ $$, $pid, \(my $err) ];
+        awaitpid($pid, \&waitcb, \$err, @cb_arg);
+        $io;
+}
+
+sub attached_pid {
+        my ($io) = @_;
+        ${${*$io}{pi_io_reap} // []}[1];
+}
+
+# caller cares about error result if they call close explicitly
+# reap->[2] may be set before this is called via waitcb
+sub close {
+        my ($io) = @_;
+        my $ret = $io->SUPER::close;
+        my $reap = delete ${*$io}{pi_io_reap};
+        return $ret unless $reap && $reap->[0] == $$;
+        ${$reap->[2]} // (my $w = awaitpid($reap->[1])); # sets [2]
+        ($? = ${$reap->[2]}) ? '' : $ret;
+}
+
+sub DESTROY {
+        my ($io) = @_;
+        my $reap = delete ${*$io}{pi_io_reap};
+        if ($reap && $reap->[0] == $$) {
+                $io->SUPER::close;
+                awaitpid($reap->[1]);
+        }
+        $io->SUPER::DESTROY;
+}
+
+1;
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index e12a56e8..dfba34b9 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -16,7 +16,7 @@ use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::ContentHash qw(content_digest);
 use PublicInbox::MDA;
 use PublicInbox::Eml;
-use PublicInbox::ProcessIO;
+use PublicInbox::IO;
 use POSIX qw(strftime);
 use autodie qw(read close socketpair);
 use Carp qw(croak);
@@ -77,7 +77,7 @@ sub gfi_start {
                                 --quiet --done --date-format=raw) ];
                 my $pid = spawn($gfi, undef, { 0 => $s2, 1 => $s2 });
                 $self->{nchg} = 0;
-                $self->{io} = PublicInbox::ProcessIO->maybe_new($pid, $io);
+                $self->{io} = PublicInbox::IO::attach_pid($io, $pid);
         };
         if ($@) {
                 $self->lock_release;
diff --git a/lib/PublicInbox/InputPipe.pm b/lib/PublicInbox/InputPipe.pm
index f4d57e7d..232f20e8 100644
--- a/lib/PublicInbox/InputPipe.pm
+++ b/lib/PublicInbox/InputPipe.pm
@@ -39,7 +39,6 @@ sub consume {
         if ($@) { # regular file (but not w/ select|IO::Poll backends)
                 $self->{-need_rq} = 1;
                 $self->requeue;
-        } elsif (do { no warnings 'unopened'; !stat($in) }) { # ProcessIONBF
         } elsif (-p _ || -S _) { # O_NONBLOCK for sockets and pipes
                 $in->blocking(0);
         } elsif (-t $in) { # isatty(3) can't use `_' stat cache
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index ead60b38..b07c2c90 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -7,7 +7,7 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC);
 use PublicInbox::Eml;
-use PublicInbox::ProcessIO;
+use PublicInbox::IO;
 use PublicInbox::Spawn qw(spawn);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
@@ -160,8 +160,7 @@ sub _post_augment_mbox { # open a compressor process from top-level lei-daemon
         my $cmd = PublicInbox::MboxReader::zsfx2cmd($zsfx, undef, $lei);
         my ($r, $w) = @{delete $lei->{zpipe}};
         my $rdr = { 0 => $r, 1 => $lei->{1}, 2 => $lei->{2}, pgid => 0 };
-        my $pid = spawn($cmd, undef, $rdr);
-        $lei->{1} = PublicInbox::ProcessIO->maybe_new($pid, $w,
+        $lei->{1} = PublicInbox::IO::attach_pid($w, spawn($cmd, undef, $rdr),
                                 \&reap_compress, $lei, $cmd, $lei->{1});
 }
 
diff --git a/lib/PublicInbox/ProcessIO.pm b/lib/PublicInbox/ProcessIO.pm
deleted file mode 100644
index ea5d3e6c..00000000
--- a/lib/PublicInbox/ProcessIO.pm
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (C) all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-# a tied handle for auto reaping of children tied to a pipe or socket,
-# see perltie(1) for details.
-package PublicInbox::ProcessIO;
-use v5.12;
-use PublicInbox::DS qw(awaitpid);
-use Symbol qw(gensym);
-use bytes qw(length);
-
-sub maybe_new {
-        my ($cls, $pid, $fh, @cb_arg) = @_;
-        return ($fh, $pid) if wantarray;
-        my $s = gensym;
-        tie *$s, $cls, $pid, $fh, @cb_arg;
-        $s;
-}
-
-sub waitcb { # awaitpid callback
-        my ($pid, $err_ref, $cb, @args) = @_;
-        $$err_ref = $?; # sets >{pp_chld_err} for _close
-        $cb->($pid, @args) if $cb;
-}
-
-sub TIEHANDLE {
-        my ($cls, $pid, $fh, @cb_arg) = @_;
-        my $self = bless { pid => $pid, fh => $fh, ppid => $$ }, $cls;
-        # we share $err (and not $self) with awaitpid to avoid a ref cycle
-        $self->{pp_chld_err} = \(my $err);
-        awaitpid($pid, \&waitcb, \$err, @cb_arg);
-        $self;
-}
-
-# for IO::Uncompress::Gunzip and PublicInbox::LeiRediff
-sub BINMODE { @_ == 1 ? binmode($_[0]->{fh}) : binmode($_[0]->{fh}, $_[1]) }
-
-sub READ { read($_[0]->{fh}, $_[1], $_[2], $_[3] || 0) }
-
-sub READLINE { readline($_[0]->{fh}) }
-
-sub WRITE { syswrite($_[0]->{fh}, $_[1], $_[2] // length($_[1]), $_[3] // 0) }
-
-sub PRINT { print { $_[0]->{fh} } @_[1..$#_] }
-
-sub FILENO { fileno($_[0]->{fh}) }
-
-sub _close ($;$) {
-        my ($self, $wait) = @_;
-        my ($fh, $pid) = delete(@$self{qw(fh pid)});
-        my $ret = (defined($fh) && $wait) ? close($fh) : ($fh = '');
-        return $ret unless defined($pid) && $self->{ppid} == $$;
-        if ($wait) { # caller cares about the exit status:
-                # synchronous wait via defined(wantarray) on awaitpid:
-                defined(${$self->{pp_chld_err}}) or $wait = awaitpid($pid);
-                ($? = ${$self->{pp_chld_err}}) and $ret = '';
-        } else {
-                awaitpid($pid); # depends on $in_loop or not
-        }
-        $ret;
-}
-
-# if caller uses close(), assume they want to check $? immediately so
-# we'll waitpid() synchronously.  n.b. wantarray doesn't seem to
-# propagate `undef' down to tied methods, otherwise I'd rely on that.
-sub CLOSE { _close($_[0], 1) }
-
-# if relying on DESTROY, assume the caller doesn't care about $? and
-# we can let the event loop call waitpid() whenever it gets SIGCHLD
-sub DESTROY {
-        _close($_[0]);
-        undef;
-}
-
-1;
diff --git a/lib/PublicInbox/ProcessIONBF.pm b/lib/PublicInbox/ProcessIONBF.pm
deleted file mode 100644
index 490e200a..00000000
--- a/lib/PublicInbox/ProcessIONBF.pm
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (C) all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-# used to support unbuffered partial reads
-package PublicInbox::ProcessIONBF;
-use v5.12;
-use parent qw(PublicInbox::ProcessIO);
-use IO::Handle; # ->blocking
-
-sub new {
-        my ($cls, $pid, $fh, @cb_arg) = @_;
-        $fh->blocking(0) // die "$fh->blocking(0): $!";
-        my $io = $cls->SUPER::maybe_new($pid, $fh, @cb_arg);
-}
-
-sub replace {
-        my ($cls, $orig) = @_;
-        my $pio = tied *$orig; # ProcessIO
-        $pio->{fh}->blocking(0) // die "$pio->{fh}->blocking(0): $!";
-        bless $pio, $cls;
-}
-
-sub READ { sysread($_[0]->{fh}, $_[1], $_[2], $_[3] // 0) }
-
-1;
diff --git a/lib/PublicInbox/Qspawn.pm b/lib/PublicInbox/Qspawn.pm
index a03e1b01..0bf857c6 100644
--- a/lib/PublicInbox/Qspawn.pm
+++ b/lib/PublicInbox/Qspawn.pm
@@ -149,7 +149,7 @@ sub finish ($;$) {
 
         # we can safely finalize if pipe was closed before, or if
         # {_err} is defined by waitpid_err.  Deleting {rpipe} will
-        # trigger PublicInbox::ProcessIO::DESTROY -> waitpid_err,
+        # trigger PublicInbox::IO::DESTROY -> waitpid_err,
         # but it may not fire right away if inside the event loop.
         my $closed_before = !delete($self->{rpipe});
         finalize($self) if $closed_before || defined($self->{_err});
@@ -244,9 +244,8 @@ sub ipipe_cb { # InputPipe callback
 sub _yield_start { # may run later, much later...
         my ($self) = @_;
         if ($self->{psgi_env}->{'pi-httpd.async'}) {
-                require PublicInbox::ProcessIONBF;
                 my $rpipe = $self->{rpipe};
-                PublicInbox::ProcessIONBF->replace($rpipe);
+                $rpipe->blocking(0);
                 PublicInbox::InputPipe::consume($rpipe, \&ipipe_cb, $self);
         } else {
                 require PublicInbox::GetlineResponse;
diff --git a/lib/PublicInbox/Spawn.pm b/lib/PublicInbox/Spawn.pm
index b4f37bea..d3b7ef6f 100644
--- a/lib/PublicInbox/Spawn.pm
+++ b/lib/PublicInbox/Spawn.pm
@@ -19,7 +19,7 @@ use PublicInbox::Lock;
 use Fcntl qw(SEEK_SET);
 use IO::Handle ();
 use Carp qw(croak);
-use PublicInbox::ProcessIO;
+use PublicInbox::IO;
 our @EXPORT_OK = qw(which spawn popen_rd popen_wr run_die run_wait run_qx);
 our @RLIMITS = qw(RLIMIT_CPU RLIMIT_CORE RLIMIT_DATA);
 use autodie qw(open pipe seek sysseek truncate);
@@ -377,7 +377,7 @@ sub popen_rd {
         my ($cmd, $env, $opt, @cb_arg) = @_;
         pipe(my $r, local $opt->{1});
         my $pid = spawn($cmd, $env, $opt);
-        PublicInbox::ProcessIO->maybe_new($pid, $r, @cb_arg);
+        wantarray ? ($r, $pid) : PublicInbox::IO::attach_pid($r, $pid, @cb_arg)
 }
 
 sub popen_wr {
@@ -385,7 +385,7 @@ sub popen_wr {
         pipe(local $opt->{0}, my $w);
         $w->autoflush(1);
         my $pid = spawn($cmd, $env, $opt);
-        PublicInbox::ProcessIO->maybe_new($pid, $w, @cb_arg)
+        wantarray ? ($w, $pid) : PublicInbox::IO::attach_pid($w, $pid, @cb_arg)
 }
 
 sub read_out_err ($) {