1 files changed, 362 insertions, 337 deletions
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 9c278307..a6fec954 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -21,42 +21,43 @@
  #        (tmpio = [ GLOB, offset, [ length ] ])
  package PublicInbox::DS;
  use strict;
-use bytes;
-use POSIX qw(WNOHANG);
-use IO::Handle qw();
+use v5.10.1;
+use parent qw(Exporter);
+use bytes qw(length substr); # FIXME(?): needed for PublicInbox::NNTP
+use POSIX qw(WNOHANG sigprocmask SIG_SETMASK SIG_UNBLOCK);
  use Fcntl qw(SEEK_SET :DEFAULT O_APPEND);
  use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
-use parent qw(Exporter);
-our @EXPORT_OK = qw(now msg_more);
-use 5.010_001;
  use Scalar::Util qw(blessed);
-use PublicInbox::Syscall qw(:epoll);
+use PublicInbox::Syscall qw(%SIGNUM
+        EPOLLIN EPOLLOUT EPOLLONESHOT EPOLLEXCLUSIVE);
  use PublicInbox::Tmpfile;
-use Errno qw(EAGAIN EINVAL);
-use Carp qw(confess carp);
+use PublicInbox::Select;
+use PublicInbox::OnDestroy;
+use Errno qw(EAGAIN EINVAL ECHILD);
+use Carp qw(carp croak);
+our @EXPORT_OK = qw(now msg_more awaitpid add_timer add_uniq_timer);
  
  my $nextq; # queue for next_tick
-my $wait_pids; # list of [ pid, callback, callback_arg ]
-my $later_queue; # list of callbacks to run at some later interval
-my $EXPMAP; # fd -> idle_time
-our $EXPTIME = 180; # 3 minutes
-my ($later_timer, $reap_armed, $exp_timer);
-my $ToClose; # sockets to close when event loop is done
-our (
-     %DescriptorMap,             # fd (num) -> PublicInbox::DS object
-     $Epoll,                     # Global epoll fd (or DSKQXS ref)
-     $_io,                       # IO::Handle for Epoll
-
-     $PostLoopCallback,          # subref to call at the end of each loop, if defined (global)
-
-     $LoopTimeout,               # timeout of event loop in milliseconds
-     $DoneInit,                  # if we've done the one-time module init yet
+my $reap_armed;
+my @active; # FDs (or objs) returned by epoll/kqueue
+our (%AWAIT_PIDS, # pid => [ $callback, @args ]
+        $cur_runq, # only set inside next_tick
+     @FD_MAP, # fd (num) -> PublicInbox::DS object
+     $Poller, # global Select, Epoll, DSPoll, or DSKQXS ref
+
+     @post_loop_do,              # subref + args to call at the end of each loop
+
+     $loop_timeout,               # timeout of event loop in milliseconds
       @Timers,                    # timers
+     %UniqTimer,
       $in_loop,
       );
  
  Reset();
  
+# clobber everything explicitly to avoid DESTROY ordering problems w/ DBI
+END { Reset() }
+
  #####################################################################
  ### C L A S S   M E T H O D S
  #####################################################################
@@ -67,180 +68,156 @@ Reset all state
  
  =cut
  sub Reset {
-    %DescriptorMap = ();
-    $in_loop = $wait_pids = $later_queue = $reap_armed = undef;
-    $EXPMAP = {};
-    $nextq = $ToClose = $later_timer = $exp_timer = undef;
-    $LoopTimeout = -1;  # no timeout by default
-    @Timers = ();
-
-    $PostLoopCallback = undef;
-    $DoneInit = 0;
-
-    $_io = undef; # closes real $Epoll FD
-    $Epoll = undef; # may call DSKQXS::DESTROY
+        $Poller = bless [], 'PublicInbox::DummyPoller';
+        do {
+                $in_loop = undef; # first in case DESTROY callbacks use this
+                # clobbering $Poller may call DSKQXS::DESTROY,
+                # we must always have this set to something to avoid
+                # needing branches before ep_del/ep_mod calls (via ->close).
+                @FD_MAP = ();
+                @Timers = ();
+                %UniqTimer = ();
+                @post_loop_do = ();
+
+                # we may be called from an *atfork_child inside next_tick:
+                @$cur_runq = () if $cur_runq;
+                @active = ();
+                $nextq = undef; # may call ep_del
+                %AWAIT_PIDS = ();
+        } while (@Timers || $nextq || keys(%AWAIT_PIDS) ||
+                @active || @FD_MAP ||
+                @post_loop_do || keys(%UniqTimer) ||
+                scalar(@{$cur_runq // []})); # do not vivify cur_runq
  
-    *EventLoop = *FirstTimeEventLoop;
-}
-
-=head2 C<< CLASS->SetLoopTimeout( $timeout ) >>
-
-Set the loop timeout for the event loop to some value in milliseconds.
-
-A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return
-immediately.
-
-=cut
-sub SetLoopTimeout {
-    return $LoopTimeout = $_[1] + 0;
+        $reap_armed = undef;
+        $loop_timeout = -1;  # no timeout by default
+        $Poller = PublicInbox::Select->new;
  }
  
-=head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >>
-
-Add a timer to occur $seconds from now. $seconds may be fractional, but timers
-are not guaranteed to fire at the exact time you ask for.
-
-=cut
-sub add_timer ($$;$) {
-    my ($secs, $coderef, $arg) = @_;
-
-    my $fire_time = now() + $secs;
-
-    my $timer = [$fire_time, $coderef, $arg];
-
-    if (!@Timers || $fire_time >= $Timers[-1][0]) {
-        push @Timers, $timer;
-        return $timer;
-    }
-
-    # Now, where do we insert?  (NOTE: this appears slow, algorithm-wise,
-    # but it was compared against calendar queues, heaps, naive push/sort,
-    # and a bunch of other versions, and found to be fastest with a large
-    # variety of datasets.)
-    for (my $i = 0; $i < @Timers; $i++) {
-        if ($Timers[$i][0] > $fire_time) {
-            splice(@Timers, $i, 0, $timer);
-            return $timer;
-        }
-    }
-
-    die "Shouldn't get here.";
-}
+sub _add_named_timer {
+        my ($name, $secs, $coderef, @args) = @_;
+        my $fire_time = now() + $secs;
+        my $timer = [$fire_time, $name, $coderef, @args];
  
-# keeping this around in case we support other FD types for now,
-# epoll_create1(EPOLL_CLOEXEC) requires Linux 2.6.27+...
-sub set_cloexec ($) {
-    my ($fd) = @_;
+        if (!@Timers || $fire_time >= $Timers[-1][0]) {
+                push @Timers, $timer;
+                return $timer;
+        }
  
-    $_io = IO::Handle->new_from_fd($fd, 'r+') or return;
-    defined(my $fl = fcntl($_io, F_GETFD, 0)) or return;
-    fcntl($_io, F_SETFD, $fl | FD_CLOEXEC);
+        # Now, where do we insert?  (NOTE: this appears slow, algorithm-wise,
+        # but it was compared against calendar queues, heaps, naive push/sort,
+        # and a bunch of other versions, and found to be fastest with a large
+        # variety of datasets.)
+        for (my $i = 0; $i < @Timers; $i++) {
+                if ($Timers[$i][0] > $fire_time) {
+                        splice(@Timers, $i, 0, $timer);
+                        return $timer;
+                }
+        }
+        die "Shouldn't get here.";
  }
  
-sub _InitPoller
-{
-    return if $DoneInit;
-    $DoneInit = 1;
+sub add_timer { _add_named_timer(undef, @_) }
  
-    if (PublicInbox::Syscall::epoll_defined())  {
-        $Epoll = epoll_create();
-        set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0);
-    } else {
-        my $cls;
-        for (qw(DSKQXS DSPoll)) {
-            $cls = "PublicInbox::$_";
-            last if eval "require $cls";
-        }
-        $cls->import(qw(epoll_ctl epoll_wait));
-        $Epoll = $cls->new;
-    }
-    *EventLoop = *EpollEventLoop;
+sub add_uniq_timer { # ($name, $secs, $coderef, @args) = @_;
+        $UniqTimer{$_[0]} //= _add_named_timer(@_);
  }
  
-=head2 C<< CLASS->EventLoop() >>
-
-Start processing IO events. In most daemon programs this never exits. See
-C<PostLoopCallback> below for how to exit the loop.
-
-=cut
-sub FirstTimeEventLoop {
-    my $class = shift;
-
-    _InitPoller();
-
-    EventLoop($class);
+# caller sets return value to $Poller
+sub _InitPoller () {
+        my @try = ($^O eq 'linux' ? 'Epoll' : 'DSKQXS');
+        my $cls;
+        for (@try, 'DSPoll') {
+                $cls = "PublicInbox::$_";
+                last if eval "require $cls";
+        }
+        $cls->new;
  }
  
  sub now () { clock_gettime(CLOCK_MONOTONIC) }
  
  sub next_tick () {
-    my $q = $nextq or return;
-    $nextq = undef;
-    for (@$q) {
-        # we avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak:
-        # https://rt.perl.org/Public/Bug/Display.html?id=114340
-        if (blessed($_)) {
-            $_->event_step;
-        } else {
-            $_->();
-        }
-    }
+        $cur_runq = $nextq or return;
+        $nextq = undef;
+        while (my $obj = shift @$cur_runq) {
+                # avoid "ref" on blessed refs to workaround a Perl 5.16.3 leak:
+                # https://rt.perl.org/Public/Bug/Display.html?id=114340
+                blessed($obj) ? $obj->event_step : $obj->();
+        }
+        1;
  }
  
  # runs timers and returns milliseconds for next one, or next event loop
  sub RunTimers {
-    next_tick();
+        my $ran = next_tick();
  
-    return (($nextq || $ToClose) ? 0 : $LoopTimeout) unless @Timers;
+        return ($nextq || $ran ? 0 : $loop_timeout) unless @Timers;
  
-    my $now = now();
+        my $now = now();
  
-    # Run expired timers
-    while (@Timers && $Timers[0][0] <= $now) {
-        my $to_run = shift(@Timers);
-        $to_run->[1]->($to_run->[2]);
-    }
+        # Run expired timers
+        while (@Timers && $Timers[0][0] <= $now) {
+                my $to_run = shift(@Timers);
+                delete $UniqTimer{$to_run->[1] // ''};
+                $to_run->[2]->(@$to_run[3..$#$to_run]);
+                $ran = 1;
+        }
  
-    # timers may enqueue into nextq:
-    return 0 if ($nextq || $ToClose);
+        # timers may enqueue into nextq:
+        return 0 if $nextq || $ran;
  
-    return $LoopTimeout unless @Timers;
+        return $loop_timeout unless @Timers;
  
-    # convert time to an even number of milliseconds, adding 1
-    # extra, otherwise floating point fun can occur and we'll
-    # call RunTimers like 20-30 times, each returning a timeout
-    # of 0.0000212 seconds
-    my $timeout = int(($Timers[0][0] - $now) * 1000) + 1;
+        # convert time to an even number of milliseconds, adding 1
+        # extra, otherwise floating point fun can occur and we'll
+        # call RunTimers like 20-30 times, each returning a timeout
+        # of 0.0000212 seconds
+        my $t = int(($Timers[0][0] - $now) * 1000) + 1;
  
-    # -1 is an infinite timeout, so prefer a real timeout
-    return $timeout     if $LoopTimeout == -1;
+        # -1 is an infinite timeout, so prefer a real timeout
+        ($loop_timeout < 0 || $loop_timeout >= $t) ? $t : $loop_timeout
+}
  
-    # otherwise pick the lower of our regular timeout and time until
-    # the next timer
-    return $LoopTimeout if $LoopTimeout < $timeout;
-    return $timeout;
+sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }
+
+# ensure we detect bugs, HW problems and user rlimits
+our @UNBLOCKABLE = (POSIX::SIGABRT, POSIX::SIGBUS, POSIX::SIGFPE,
+        POSIX::SIGILL, POSIX::SIGSEGV, POSIX::SIGXCPU, POSIX::SIGXFSZ);
+
+sub block_signals { # anything in @_ stays unblocked
+        my $newset = POSIX::SigSet->new;
+        $newset->fillset or die "fillset: $!";
+        for (@_, @UNBLOCKABLE) { $newset->delset($_) or die "delset($_): $!" }
+        my $oldset = POSIX::SigSet->new;
+        sig_setmask($newset, $oldset);
+        $oldset;
  }
  
-# We can't use waitpid(-1) safely here since it can hit ``, system(),
-# and other things.  So we scan the $wait_pids list, which is hopefully
-# not too big.  We keep $wait_pids small by not calling dwaitpid()
-# until we've hit EOF when reading the stdout of the child.
+sub await_cb ($;@) {
+        my ($pid, @cb_args) = @_;
+        my $cb = shift @cb_args or return;
+        eval { $cb->($pid, @cb_args) };
+        warn "E: awaitpid($pid): $@" if $@;
+}
  
+# This relies on our Perl process being single-threaded, or at least
+# no threads spawning and waiting on processes (``, system(), etc...)
+# Threads are officially discouraged by the Perl5 team, and I expect
+# that to remain the case.
  sub reap_pids {
          $reap_armed = undef;
-        my $tmp = $wait_pids or return;
-        $wait_pids = undef;
-        foreach my $ary (@$tmp) {
-                my ($pid, $cb, $arg) = @$ary;
-                my $ret = waitpid($pid, WNOHANG);
-                if ($ret == 0) {
-                        push @$wait_pids, $ary; # autovivifies @$wait_pids
-                } elsif ($cb) {
-                        eval { $cb->($arg, $pid) };
+        while (1) {
+                my $pid = waitpid(-1, WNOHANG) or return;
+                if (defined(my $cb_args = delete $AWAIT_PIDS{$pid})) {
+                        await_cb($pid, @$cb_args) if $cb_args;
+                } elsif ($pid == -1 && $! == ECHILD) {
+                        return requeue(\&dflush); # force @post_loop_do to run
+                } elsif ($pid > 0) {
+                        warn "W: reaped unknown PID=$pid: \$?=$?\n";
+                } else { # does this happen?
+                        return warn("W: waitpid(-1, WNOHANG) => $pid ($!)");
                  }
          }
-        # we may not be done, yet, and could've missed/masked a SIGCHLD:
-        $reap_armed //= requeue(\&reap_pids) if $wait_pids;
  }
  
  # reentrant SIGCHLD handler (since reap_pids is not reentrant)
@@ -248,65 +225,80 @@ sub enqueue_reap () { $reap_armed //= requeue(\&reap_pids) }
  
  sub in_loop () { $in_loop }
  
+# use inside @post_loop_do, returns number of busy clients
+sub close_non_busy () {
+        my $n = 0;
+        for my $s (grep defined, @FD_MAP) {
+                # close as much as possible, early as possible
+                ($s->busy ? ++$n : $s->close) if $s->can('busy');
+        }
+        $n;
+}
+
  # Internal function: run the post-event callback, send read events
  # for pushed-back data, and close pending connections.  returns 1
  # if event loop should continue, or 0 to shut it all down.
  sub PostEventLoop () {
-        # now we can close sockets that wanted to close during our event
-        # processing.  (we didn't want to close them during the loop, as we
-        # didn't want fd numbers being reused and confused during the event
-        # loop)
-        if (my $close_now = $ToClose) {
-                $ToClose = undef; # will be autovivified on push
-                @$close_now = map { fileno($_) } @$close_now;
-
-                # order matters, destroy expiry times, first:
-                delete @$EXPMAP{@$close_now};
-
-                # ->DESTROY methods may populate ToClose
-                delete @DescriptorMap{@$close_now};
-        }
-
          # by default we keep running, unless a postloop callback cancels it
-        $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1;
-}
-
-sub EpollEventLoop {
-    local $in_loop = 1;
-    do {
-        my @events;
-        my $i;
-        my $timeout = RunTimers();
-
-        # get up to 1000 events
-        my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events);
-        for ($i=0; $i<$evcount; $i++) {
-            # it's possible epoll_wait returned many events, including some at the end
-            # that ones in the front triggered unregister-interest actions.  if we
-            # can't find the %sock entry, it's because we're no longer interested
-            # in that event.
-            $DescriptorMap{$events[$i]->[0]}->event_step;
-        }
-    } while (PostEventLoop());
-    _run_later();
+        @post_loop_do ? $post_loop_do[0]->(@post_loop_do[1..$#post_loop_do]) : 1
  }
  
-=head2 C<< CLASS->SetPostLoopCallback( CODEREF ) >>
-
-Sets post loop callback function.  Pass a subref and it will be
-called every time the event loop finishes.
-
-Return 1 (or any true value) from the sub to make the loop continue, 0 or false
-and it will exit.
+sub sigset_prep ($$$) {
+        my ($sig, $init, $each) = @_; # $sig: { signame => whatever }
+        my $ret = POSIX::SigSet->new;
+        $ret->$init or die "$init: $!";
+        for my $s (keys %$sig) {
+                my $num = $SIGNUM{$s} // POSIX->can("SIG$s")->();
+                $ret->$each($num) or die "$each ($s => $num): $!";
+        }
+        for (@UNBLOCKABLE) { $ret->$each($_) or die "$each ($_): $!" }
+        $ret;
+}
+
+sub allowset ($) { sigset_prep $_[0], 'fillset', 'delset' }
+sub unblockset ($) { sigset_prep $_[0], 'emptyset', 'addset' }
+
+# Start processing IO events. In most daemon programs this never exits. See
+# C<post_loop_do> for how to exit the loop.
+sub event_loop (;$$) {
+        my ($sig, $oldset) = @_;
+        $Poller //= _InitPoller();
+        require PublicInbox::Sigfd if $sig;
+        my $sigfd = $sig ? PublicInbox::Sigfd->new($sig) : undef;
+        if ($sigfd && $sigfd->{is_kq}) {
+                my $tmp = allowset($sig);
+                local @SIG{keys %$sig} = values(%$sig);
+                sig_setmask($tmp, my $old = POSIX::SigSet->new);
+                # Unlike Linux signalfd, EVFILT_SIGNAL can't handle
+                # signals received before the filter is created,
+                # so we peek at signals here.
+                sig_setmask($old);
+        }
+        local @SIG{keys %$sig} = values(%$sig) if $sig && !$sigfd;
+        local $SIG{PIPE} = 'IGNORE';
+        if (!$sigfd && $sig) {
+                # wake up every second to accept signals if we don't
+                # have signalfd or IO::KQueue:
+                sig_setmask($oldset) if $oldset;
+                sigprocmask(SIG_UNBLOCK, unblockset($sig)) or
+                        die "SIG_UNBLOCK: $!";
+                $loop_timeout = 1000;
+        }
+        $_[0] = $sigfd = $sig = undef; # $_[0] == sig
+        local $in_loop = 1;
+        do {
+                my $timeout = RunTimers();
  
-The callback function will be passed two parameters: \%DescriptorMap
+                # grab whatever FDs are ready
+                $Poller->ep_wait($timeout, \@active);
  
-=cut
-sub SetPostLoopCallback {
-    my ($class, $ref) = @_;
+                # map all FDs to their associated Perl object
+                @active = @FD_MAP[@active];
  
-    # global callback
-    $PostLoopCallback = (defined $ref && ref $ref eq 'CODE') ? $ref : undef;
+                while (my $obj = shift @active) {
+                        $obj->event_step;
+                }
+        } while (PostEventLoop());
  }
  
  #####################################################################
@@ -318,7 +310,7 @@ sub SetPostLoopCallback {
  =head2 C<< CLASS->new( $socket ) >>
  
  Create a new PublicInbox::DS subclass object for the given I<socket> which will
-react to events on it during the C<EventLoop>.
+react to events on it during the C<event_loop>.
  
  This is normally (always?) called from your subclass via:
  
@@ -330,62 +322,54 @@ sub new {
      $self->{sock} = $sock;
      my $fd = fileno($sock);
  
-    _InitPoller();
-
-    if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
+    $Poller //= _InitPoller();
+retry:
+    if ($Poller->ep_add($sock, $ev)) {
          if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) {
              $ev &= ~EPOLLEXCLUSIVE;
              goto retry;
          }
-        die "couldn't add epoll watch for $fd: $!\n";
+        die "EPOLL_CTL_ADD $self/$sock/$fd: $!";
      }
-    confess("DescriptorMap{$fd} defined ($DescriptorMap{$fd})")
-        if defined($DescriptorMap{$fd});
+    defined($FD_MAP[$fd]) and
+                croak("BUG: FD:$fd in use by $FD_MAP[$fd] (for $self/$sock)");
  
-    $DescriptorMap{$fd} = $self;
+    $FD_MAP[$fd] = $self;
  }
  
-
-#####################################################################
-### I N S T A N C E   M E T H O D S
-#####################################################################
+# for IMAP, NNTP, and POP3 which greet clients upon connect
+sub greet {
+        my ($self, $sock) = @_;
+        my $ev = EPOLLIN;
+        my $wbuf;
+        if ($sock->can('accept_SSL') && !$sock->accept_SSL) {
+                return if $! != EAGAIN || !($ev = PublicInbox::TLS::epollbit());
+                $wbuf = [ \&accept_tls_step, $self->can('do_greet')];
+        }
+        new($self, $sock, $ev | EPOLLONESHOT);
+        if ($wbuf) {
+                $self->{wbuf} = $wbuf;
+        } else {
+                $self->do_greet;
+        }
+        $self;
+}
  
  sub requeue ($) { push @$nextq, $_[0] } # autovivifies
  
-=head2 C<< $obj->close >>
-
-Close the socket.
-
-=cut
+# drop the IO::Handle ref, true if successful, false if not (or already dropped)
+# (this is closer to CORE::close than Danga::Socket::close)
  sub close {
-    my ($self) = @_;
-    my $sock = delete $self->{sock} or return;
-
-    # we need to flush our write buffer, as there may
-    # be self-referential closures (sub { $client->close })
-    # preventing the object from being destroyed
-    delete $self->{wbuf};
-
-    # if we're using epoll, we have to remove this from our epoll fd so we stop getting
-    # notifications about it
-    my $fd = fileno($sock);
-    epoll_ctl($Epoll, EPOLL_CTL_DEL, $fd, 0) and
-        confess("EPOLL_CTL_DEL: $!");
-
-    # we explicitly don't delete from DescriptorMap here until we
-    # actually close the socket, as we might be in the middle of
-    # processing an epoll_wait/etc that returned hundreds of fds, one
-    # of which is not yet processed and is what we're closing.  if we
-    # keep it in DescriptorMap, then the event harnesses can just
-    # looked at $pob->{sock} == undef and ignore it.  but if it's an
-    # un-accounted for fd, then it (understandably) freak out a bit
-    # and emit warnings, thinking their state got off.
+        my ($self) = @_;
+        my $sock = delete $self->{sock} or return;
  
-    # defer closing the actual socket until the event loop is done
-    # processing this round of events.  (otherwise we might reuse fds)
-    push @$ToClose, $sock; # autovivifies $ToClose
+        # we need to clear our write buffer, as there may
+        # be self-referential closures (sub { $client->close })
+        # preventing the object from being destroyed
+        delete $self->{wbuf};
+        $FD_MAP[fileno($sock)] = undef;
  
-    return 0;
+        !$Poller->ep_del($sock); # stop getting notifications
  }
  
  # portable, non-thread-safe sendfile emulation (no pread, yet)
@@ -431,8 +415,8 @@ next_buf:
                          shift @$wbuf;
                          goto next_buf;
                      }
-                } elsif ($! == EAGAIN) {
-                    epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
+                } elsif ($! == EAGAIN && (my $ev = epbit($sock, EPOLLOUT))) {
+                    epwait($sock, $ev | EPOLLONESHOT);
                      return 0;
                  } else {
                      return $self->close;
@@ -461,39 +445,40 @@ sub rbuf_idle ($$) {
      }
  }
  
+# returns true if bytes are read, false otherwise
  sub do_read ($$$;$) {
-    my ($self, $rbuf, $len, $off) = @_;
-    my $r = sysread(my $sock = $self->{sock}, $$rbuf, $len, $off // 0);
-    return ($r == 0 ? $self->close : $r) if defined $r;
-    # common for clients to break connections without warning,
-    # would be too noisy to log here:
-    if ($! == EAGAIN) {
-        epwait($sock, epbit($sock, EPOLLIN) | EPOLLONESHOT);
-        rbuf_idle($self, $rbuf);
-        0;
-    } else {
-        $self->close;
-    }
+        my ($self, $rbuf, $len, $off) = @_;
+        my ($ev, $r, $s);
+        $r = sysread($s = $self->{sock}, $$rbuf, $len, $off // 0) and return $r;
+
+        if (!defined($r) && $! == EAGAIN && ($ev = epbit($s, EPOLLIN))) {
+                epwait($s, $ev | EPOLLONESHOT);
+                rbuf_idle($self, $rbuf);
+        } else {
+                $self->close;
+        }
+        undef;
  }
  
  # drop the socket if we hit unrecoverable errors on our system which
  # require BOFH attention: ENOSPC, EFBIG, EIO, EMFILE, ENFILE...
  sub drop {
-    my $self = shift;
-    carp(@_);
-    $self->close;
+        my $self = shift;
+        carp(@_);
+        $self->close;
+        undef;
  }
  
-# n.b.: use ->write/->read for this buffer to allow compatibility with
-# PerlIO::mmap or PerlIO::scalar if needed
  sub tmpio ($$$) {
-    my ($self, $bref, $off) = @_;
-    my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
-        return drop($self, "tmpfile $!");
-    $fh->autoflush(1);
-    my $len = bytes::length($$bref) - $off;
-    $fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!");
-    [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
+        my ($self, $bref, $off) = @_;
+        my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
+                return drop($self, "tmpfile $!");
+        $fh->autoflush(1);
+        my $len = length($$bref) - $off;
+        my $n = syswrite($fh, $$bref, $len, $off) //
+                return drop($self, "write ($len): $!");
+        $n == $len or return drop($self, "wrote $n < $len bytes");
+        [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
  }
  
  =head2 C<< $obj->write( $data ) >>
@@ -523,7 +508,8 @@ sub write {
              push @$wbuf, $bref;
          } else {
              my $tmpio = $wbuf->[-1];
-            if ($tmpio && !defined($tmpio->[2])) { # append to tmp file buffer
+            if (ref($tmpio) eq 'ARRAY' && !defined($tmpio->[2])) {
+                # append to tmp file buffer
                  $tmpio->[0]->print($$bref) or return drop($self, "print: $!");
              } else {
                  my $tmpio = tmpio($self, $bref, 0) or return 0;
@@ -535,14 +521,15 @@ sub write {
          $bref->($self);
          return 1;
      } else {
-        my $to_write = bytes::length($$bref);
+        my $to_write = length($$bref);
          my $written = syswrite($sock, $$bref, $to_write);
  
          if (defined $written) {
              return 1 if $written == $to_write;
              requeue($self); # runs: event_step -> flush_write
          } elsif ($! == EAGAIN) {
-            epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
+            my $ev = epbit($sock, EPOLLOUT) or return $self->close;
+            epwait($sock, $ev | EPOLLONESHOT);
              $written = 0;
          } else {
              return $self->close;
@@ -569,7 +556,7 @@ sub msg_more ($$) {
                  !$sock->can('stop_SSL')) {
          my $n = send($sock, $_[1], MSG_MORE);
          if (defined $n) {
-            my $nlen = bytes::length($_[1]) - $n;
+            my $nlen = length($_[1]) - $n;
              return 1 if $nlen == 0; # all done!
              # queue up the unwritten substring:
              my $tmpio = tmpio($self, \($_[1]), $n) or return 0;
@@ -584,9 +571,8 @@ sub msg_more ($$) {
  }
  
  sub epwait ($$) {
-    my ($sock, $ev) = @_;
-    epoll_ctl($Epoll, EPOLL_CTL_MOD, fileno($sock), $ev) and
-        confess("EPOLL_CTL_MOD $!");
+        my ($io, $ev) = @_;
+        $Poller->ep_mod($io, $ev) and croak("EPOLL_CTL_MOD($io): $!");
  }
  
  # return true if complete, false if incomplete (or failure)
@@ -595,84 +581,123 @@ sub accept_tls_step ($) {
      my $sock = $self->{sock} or return;
      return 1 if $sock->accept_SSL;
      return $self->close if $! != EAGAIN;
-    epwait($sock, PublicInbox::TLS::epollbit() | EPOLLONESHOT);
+    my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+    epwait($sock, $ev | EPOLLONESHOT);
      unshift(@{$self->{wbuf}}, \&accept_tls_step); # autovivifies
      0;
  }
  
-# return true if complete, false if incomplete (or failure)
+# return value irrelevant
  sub shutdn_tls_step ($) {
      my ($self) = @_;
      my $sock = $self->{sock} or return;
      return $self->close if $sock->stop_SSL(SSL_fast_shutdown => 1);
      return $self->close if $! != EAGAIN;
-    epwait($sock, PublicInbox::TLS::epollbit() | EPOLLONESHOT);
+    my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+    epwait($sock, $ev | EPOLLONESHOT);
      unshift(@{$self->{wbuf}}, \&shutdn_tls_step); # autovivifies
-    0;
  }
  
  # don't bother with shutdown($sock, 2), we don't fork+exec w/o CLOEXEC
  # or fork w/o exec, so no inadvertent socket sharing
  sub shutdn ($) {
-    my ($self) = @_;
-    my $sock = $self->{sock} or return;
-    if ($sock->can('stop_SSL')) {
-        shutdn_tls_step($self);
-    } else {
-        $self->close;
-    }
+        my ($self) = @_;
+        my $sock = $self->{sock} or return;
+        $sock->can('stop_SSL') ? shutdn_tls_step($self) : $self->close;
  }
  
-# must be called with eval, PublicInbox::DS may not be loaded (see t/qspawn.t)
-sub dwaitpid ($$$) {
-        die "Not in EventLoop\n" unless $in_loop;
-        push @$wait_pids, [ @_ ]; # [ $pid, $cb, $arg ]
+sub dflush {} # overridden by DSdeflate
+sub compressed {} # overridden by DSdeflate
+sub long_response_done {} # overridden by Net::NNTP
  
-        # We could've just missed our SIGCHLD, cover it, here:
-        enqueue_reap();
+sub long_step {
+        my ($self) = @_;
+        # wbuf is unset or empty, here; {long} may add to it
+        my ($fd, $cb, $t0, @args) = @{$self->{long_cb}};
+        my $more = eval { $cb->($self, @args) };
+        if ($@ || !$self->{sock}) { # something bad happened...
+                delete $self->{long_cb};
+                my $elapsed = now() - $t0;
+                $@ and warn("$@ during long response[$fd] - ",
+                                sprintf('%0.6f', $elapsed),"\n");
+                $self->out(" deferred[$fd] aborted - %0.6f", $elapsed);
+                $self->close;
+        } elsif ($more) { # $self->{wbuf}:
+                # control passed to ibx_async_cat if $more == \undef
+                requeue_once($self) if !ref($more);
+        } else { # all done!
+                delete $self->{long_cb};
+                $self->long_response_done;
+                my $elapsed = now() - $t0;
+                $self->out(" deferred[$fd] done - %0.6f", $elapsed);
+                my $wbuf = $self->{wbuf}; # do NOT autovivify
+                requeue($self) unless $wbuf && @$wbuf;
+        }
  }
  
-sub _run_later () {
-        my $run = $later_queue or return;
-        $later_timer = $later_queue = undef;
-        $_->() for @$run;
-}
+sub requeue_once {
+        my ($self) = @_;
+        # COMPRESS users all share the same DEFLATE context.
+        # Flush it here to ensure clients don't see each other's data
+        $self->dflush;
  
-sub later ($) {
-        push @$later_queue, $_[0]; # autovivifies @$later_queue
-        $later_timer //= add_timer(60, \&_run_later);
+        # no recursion, schedule another call ASAP,
+        # but only after all pending writes are done.
+        # autovivify wbuf.  wbuf may be populated by $cb,
+        # no need to rearm if so: (push returns new size of array)
+        $self->requeue if push(@{$self->{wbuf}}, \&long_step) == 1;
  }
  
-sub expire_old () {
-        my $now = now();
-        my $exp = $EXPTIME;
-        my $old = $now - $exp;
-        my %new;
-        while (my ($fd, $idle_at) = each %$EXPMAP) {
-                if ($idle_at < $old) {
-                        my $ds_obj = $DescriptorMap{$fd};
-                        $new{$fd} = $idle_at if !$ds_obj->shutdn;
+sub long_response ($$;@) {
+        my ($self, $cb, @args) = @_; # cb returns true if more, false if done
+        my $sock = $self->{sock} or return;
+        # make sure we disable reading during a long response,
+        # clients should not be sending us stuff and making us do more
+        # work while we are stream a response to them
+        $self->{long_cb} = [ fileno($sock), $cb, now(), @args ];
+        long_step($self); # kick off!
+        undef;
+}
+
+sub awaitpid {
+        my ($pid, @cb_args) = @_; # @cb_args = ($cb, @args), $cb may be undef
+        $AWAIT_PIDS{$pid} = \@cb_args if @cb_args;
+        # provide synchronous API
+        if (defined(wantarray) || (!$in_loop && !@cb_args)) {
+                my $ret = waitpid($pid, 0);
+                if ($ret == $pid) {
+                        my $cb_args = delete $AWAIT_PIDS{$pid};
+                        @cb_args = @$cb_args if !@cb_args && $cb_args;
+                        await_cb($pid, @cb_args);
                  } else {
-                        $new{$fd} = $idle_at;
+                        carp "waitpid($pid) => $ret ($!)";
+                        delete $AWAIT_PIDS{$pid};
                  }
+                return $ret;
+        } elsif ($in_loop) { # We could've just missed our SIGCHLD, cover it, here:
+                enqueue_reap();
          }
-        $EXPMAP = \%new;
-        $exp_timer = scalar(keys %new) ? later(\&expire_old) : undef;
  }
  
-sub update_idle_time {
-        my ($self) = @_;
-        my $sock = $self->{sock} or return;
-        $EXPMAP->{fileno($sock)} = now();
-        $exp_timer //= later(\&expire_old);
+# for persistent child process
+sub fork_persist () {
+        my $seed = rand(0xffffffff);
+        my $pid = PublicInbox::OnDestroy::fork_tmp;
+        if ($pid == 0) {
+                srand($seed);
+                eval { Net::SSLeay::randomize() }; # may not be loaded
+                Reset();
+        }
+        $pid;
  }
  
-sub not_idle_long {
-        my ($self, $now) = @_;
-        my $sock = $self->{sock} or return;
-        my $idle_at = $EXPMAP->{fileno($sock)} or return;
-        ($idle_at + $EXPTIME) > $now;
-}
+package PublicInbox::DummyPoller; # only used during Reset
+use v5.12;
+
+sub ep_del {}
+no warnings 'once';
+*ep_add = \&ep_del;
+*ep_mod = \&ep_del;
  
  1;