about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-11-27 01:33:33 +0000
committerEric Wong <e@80x24.org>2019-11-27 10:25:43 +0000
commitd6674af04cb74a4efd513d938bed8bf7ab2838eb (patch)
tree98e1924639d6ee8be3cd0bb4d614332a0a1cbef4
parent1e44ee6d429b853a7a87ae58e56241c55ab8c306 (diff)
downloadpublic-inbox-d6674af04cb74a4efd513d938bed8bf7ab2838eb.tar.gz
Our attempt at using a self-pipe in signal handlers was
ineffective, since pure Perl code execution is deferred
and Perl doesn't use an internal self-pipe/eventfd.  In
retrospect, I actually prefer the simplicity of Perl in
this regard...

We can use sigprocmask() from Perl, so we can introduce
signalfd(2) and EVFILT_SIGNAL support on Linux and *BSD-based
systems, respectively.  These OS primitives allow us to avoid a
race where Perl checks for signals right before epoll_wait() or
kevent() puts the process to sleep.

The (few) systems nowadays without signalfd(2) or IO::KQueue
will now see wakeups every second to avoid missed signals.
-rw-r--r--MANIFEST2
-rw-r--r--lib/PublicInbox/DS.pm6
-rw-r--r--lib/PublicInbox/DSKQXS.pm103
-rw-r--r--lib/PublicInbox/Daemon.pm183
-rw-r--r--lib/PublicInbox/Sigfd.pm63
-rw-r--r--lib/PublicInbox/Syscall.pm42
-rw-r--r--t/ds-kqxs.t28
-rw-r--r--t/sigfd.t65
8 files changed, 376 insertions, 116 deletions
diff --git a/MANIFEST b/MANIFEST
index a50c1246..098e656d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -141,6 +141,7 @@ lib/PublicInbox/SearchIdxShard.pm
 lib/PublicInbox/SearchMsg.pm
 lib/PublicInbox/SearchThread.pm
 lib/PublicInbox/SearchView.pm
+lib/PublicInbox/Sigfd.pm
 lib/PublicInbox/SolverGit.pm
 lib/PublicInbox/Spamcheck.pm
 lib/PublicInbox/Spamcheck/Spamc.pm
@@ -266,6 +267,7 @@ t/replace.t
 t/reply.t
 t/search-thr-index.t
 t/search.t
+t/sigfd.t
 t/solve/0001-simple-mod.patch
 t/solve/0002-rename-with-modifications.patch
 t/solver_git.t
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 7f7cb85d..17c640f4 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -53,6 +53,7 @@ our (
      $LoopTimeout,               # timeout of event loop in milliseconds
      $DoneInit,                  # if we've done the one-time module init yet
      @Timers,                    # timers
+     $in_loop,
      );
 
 Reset();
@@ -249,7 +250,7 @@ sub reap_pids {
 sub enqueue_reap ($) { push @$nextq, \&reap_pids };
 
 sub EpollEventLoop {
-    local $SIG{CHLD} = \&enqueue_reap;
+    local $in_loop = 1;
     while (1) {
         my @events;
         my $i;
@@ -628,8 +629,7 @@ sub shutdn ($) {
 # must be called with eval, PublicInbox::DS may not be loaded (see t/qspawn.t)
 sub dwaitpid ($$$) {
     my ($pid, $cb, $arg) = @_;
-    my $chld = $SIG{CHLD};
-    if (defined($chld) && $chld eq \&enqueue_reap) {
+    if ($in_loop) {
         push @$WaitPids, [ $pid, $cb, $arg ];
 
         # We could've just missed our SIGCHLD, cover it, here:
diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm
index 84e146f8..a56079e2 100644
--- a/lib/PublicInbox/DSKQXS.pm
+++ b/lib/PublicInbox/DSKQXS.pm
@@ -8,18 +8,20 @@
 # like epoll to simplify the code in DS.pm.  This is NOT meant to be
 # an all encompassing emulation of epoll via IO::KQueue, but just to
 # support cases public-inbox-nntpd/httpd care about.
-# A pure-Perl version using syscall() is planned, and it should be
-# faster due to the lack of syscall overhead.
+#
+# It also implements signalfd(2) emulation via "tie".
+#
+# A pure-Perl version using syscall() is planned.
 package PublicInbox::DSKQXS;
 use strict;
 use warnings;
-use parent qw(IO::KQueue);
 use parent qw(Exporter);
+use Symbol qw(gensym);
 use IO::KQueue;
+use Errno qw(EAGAIN);
 use PublicInbox::Syscall qw(EPOLLONESHOT EPOLLIN EPOLLOUT EPOLLET
-        EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL);
+        EPOLL_CTL_ADD EPOLL_CTL_MOD EPOLL_CTL_DEL SFD_NONBLOCK);
 our @EXPORT_OK = qw(epoll_ctl epoll_wait);
-my $owner_pid = -1; # kqueue is close-on-fork (yes, fork, not exec)
 
 sub EV_DISPATCH () { 0x0080 }
 
@@ -41,29 +43,90 @@ sub kq_flag ($$) {
 
 sub new {
         my ($class) = @_;
-        die 'non-singleton use not supported' if $owner_pid == $$;
-        $owner_pid = $$;
-        $class->SUPER::new;
+        bless { kq => IO::KQueue->new, owner_pid => $$ }, $class;
+}
+
+# returns a new instance which behaves like signalfd on Linux.
+# It's wasteful in that it uses another FD, but it simplifies
+# our epoll-oriented code.
+sub signalfd {
+        my ($class, $signo, $flags) = @_;
+        my $sym = gensym;
+        tie *$sym, $class, $signo, $flags; # calls TIEHANDLE
+        $sym
+}
+
+sub TIEHANDLE { # similar to signalfd()
+        my ($class, $signo, $flags) = @_;
+        my $self = $class->new;
+        $self->{timeout} = ($flags & SFD_NONBLOCK) ? 0 : -1;
+        my $kq = $self->{kq};
+        $kq->EV_SET($_, EVFILT_SIGNAL, EV_ADD) for @$signo;
+        $self;
+}
+
+sub READ { # called by sysread() for signalfd compatibility
+        my ($self, undef, $len, $off) = @_; # $_[1] = buf
+        die "bad args for signalfd read" if ($len % 128) // defined($off);
+        my $timeout = $self->{timeout};
+        my $sigbuf = $self->{sigbuf} //= [];
+        my $nr = $len / 128;
+        my $r = 0;
+        $_[1] = '';
+        do {
+                while ($nr--) {
+                        my $signo = shift(@$sigbuf) or last;
+                        # caller only cares about signalfd_siginfo.ssi_signo:
+                        $_[1] .= pack('L', $signo) . ("\0" x 124);
+                        $r += 128;
+                }
+                return $r if $r;
+                my @events = eval { $self->{kq}->kevent($timeout) };
+                # workaround https://rt.cpan.org/Ticket/Display.html?id=116615
+                if ($@) {
+                        next if $@ =~ /Interrupted system call/;
+                        die;
+                }
+                if (!scalar(@events) && $timeout == 0) {
+                        $! = EAGAIN;
+                        return;
+                }
+
+                # Grab the kevent.ident (signal number).  The kevent.data
+                # field shows coalesced signals, and maybe we'll use it
+                # in the future...
+                @$sigbuf = map { $_->[0] } @events;
+        } while (1);
 }
 
+# for fileno() calls in PublicInbox::DS
+sub FILENO { ${$_[0]->{kq}} }
+
 sub epoll_ctl {
         my ($self, $op, $fd, $ev) = @_;
+        my $kq = $self->{kq};
         if ($op == EPOLL_CTL_MOD) {
-                $self->EV_SET($fd, EVFILT_READ, kq_flag(EPOLLIN, $ev));
-                $self->EV_SET($fd, EVFILT_WRITE, kq_flag(EPOLLOUT, $ev));
+                $kq->EV_SET($fd, EVFILT_READ, kq_flag(EPOLLIN, $ev));
+                $kq->EV_SET($fd, EVFILT_WRITE, kq_flag(EPOLLOUT, $ev));
         } elsif ($op == EPOLL_CTL_DEL) {
-                $self->EV_SET($fd, EVFILT_READ, EV_DISABLE);
-                $self->EV_SET($fd, EVFILT_WRITE, EV_DISABLE);
-        } else {
-                $self->EV_SET($fd, EVFILT_READ, EV_ADD|kq_flag(EPOLLIN, $ev));
-                $self->EV_SET($fd, EVFILT_WRITE, EV_ADD|kq_flag(EPOLLOUT, $ev));
+                $kq->EV_SET($fd, EVFILT_READ, EV_DISABLE);
+                $kq->EV_SET($fd, EVFILT_WRITE, EV_DISABLE);
+        } else { # EPOLL_CTL_ADD
+                $kq->EV_SET($fd, EVFILT_READ, EV_ADD|kq_flag(EPOLLIN, $ev));
+
+                # we call this blindly for read-only FDs such as tied
+                # DSKQXS (signalfd emulation) and Listeners
+                eval {
+                        $kq->EV_SET($fd, EVFILT_WRITE, EV_ADD |
+                                                        kq_flag(EPOLLOUT, $ev));
+                };
         }
         0;
 }
 
 sub epoll_wait {
         my ($self, $maxevents, $timeout_msec, $events) = @_;
-        @$events = eval { $self->kevent($timeout_msec) };
+        @$events = eval { $self->{kq}->kevent($timeout_msec) };
         if (my $err = $@) {
                 # workaround https://rt.cpan.org/Ticket/Display.html?id=116615
                 if ($err =~ /Interrupted system call/) {
@@ -76,11 +139,13 @@ sub epoll_wait {
         scalar(@$events);
 }
 
+# kqueue is close-on-fork (not exec), so we must not close it
+# in forked processes:
 sub DESTROY {
         my ($self) = @_;
-        if ($owner_pid == $$) {
-                POSIX::close($$self);
-                $owner_pid = -1;
+        my $kq = delete $self->{kq} or return;
+        if (delete($self->{owner_pid}) == $$) {
+                POSIX::close($$kq);
         }
 }
 
diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm
index 0e3b95d2..c7a71ba0 100644
--- a/lib/PublicInbox/Daemon.pm
+++ b/lib/PublicInbox/Daemon.pm
@@ -15,9 +15,11 @@ use Cwd qw/abs_path/;
 STDOUT->autoflush(1);
 STDERR->autoflush(1);
 use PublicInbox::DS qw(now);
+use PublicInbox::Syscall qw(SFD_NONBLOCK);
 require PublicInbox::EvCleanup;
 require PublicInbox::Listener;
 require PublicInbox::ParentPipe;
+require PublicInbox::Sigfd;
 my @CMD;
 my ($set_user, $oldset, $newset);
 my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize);
@@ -74,12 +76,14 @@ sub accept_tls_opt ($) {
         { SSL_server => 1, SSL_startHandshake => 0, SSL_reuse_ctx => $ctx };
 }
 
+sub sig_setmask { sigprocmask(SIG_SETMASK, @_) or die "sigprocmask: $!" }
+
 sub daemon_prepare ($) {
         my ($default_listen) = @_;
         $oldset = POSIX::SigSet->new();
         $newset = POSIX::SigSet->new();
         $newset->fillset or die "fillset: $!";
-        sigprocmask(SIG_SETMASK, $newset, $oldset) or die "sigprocmask: $!";
+        sig_setmask($newset, $oldset);
         @CMD = ($0, @ARGV);
         my %opts = (
                 'l|listen=s' => \@cfg_listen,
@@ -252,30 +256,12 @@ sub daemonize () {
         }
 }
 
-sub shrink_pipes {
-        if ($^O eq 'linux') { # 1031: F_SETPIPE_SZ, 4096: page size
-                fcntl($_, 1031, 4096) for @_;
-        }
-}
-
-sub worker_quit {
+sub worker_quit { # $_[0] = signal name or number (unused)
         # killing again terminates immediately:
         exit unless @listeners;
 
         $_->close foreach @listeners; # call PublicInbox::DS::close
         @listeners = ();
-
-        # create a lazy self-pipe which kicks us out of the EventLoop
-        # so DS::PostEventLoop can fire
-        if (pipe(my ($r, $w))) {
-                shrink_pipes($w);
-
-                # shrink_pipes == noop
-                PublicInbox::ParentPipe->new($r, *shrink_pipes);
-                close $w; # wake up from the event loop
-        } else {
-                warn "E: pipe failed ($!), quit unreliable\n";
-        }
         my $proc_name;
         my $warn = 0;
         # drop idle connections and try to quit gracefully
@@ -398,7 +384,7 @@ processes when multiple service instances start.
         @rv
 }
 
-sub upgrade () {
+sub upgrade { # $_[0] = signal name or number (unused)
         if ($reexec_pid) {
                 warn "upgrade in-progress: $reexec_pid\n";
                 return;
@@ -453,7 +439,7 @@ sub upgrade_aborted ($) {
         warn $@, "\n" if $@;
 }
 
-sub reap_children () {
+sub reap_children { # $_[0] = 'CHLD' or POSIX::SIGCHLD()
         while (1) {
                 my $p = waitpid(-1, WNOHANG) or return;
                 if (defined $reexec_pid && $p == $reexec_pid) {
@@ -483,60 +469,50 @@ sub unlink_pid_file_safe_ish ($$) {
 
 sub master_loop {
         pipe(my ($p0, $p1)) or die "failed to create parent-pipe: $!";
-        pipe(my ($r, $w)) or die "failed to create self-pipe: $!";
-        shrink_pipes($w, $p1);
-
-        IO::Handle::blocking($w, 0);
+        # 1031: F_SETPIPE_SZ, 4096: page size
+        fcntl($p1, 1031, 4096) if $^O eq 'linux';
         my $set_workers = $worker_processes;
-        my @caught;
-        my $master_pid = $$;
-        foreach my $s (qw(HUP CHLD QUIT INT TERM USR1 USR2 TTIN TTOU WINCH)) {
-                $SIG{$s} = sub {
-                        return if $$ != $master_pid;
-                        push @caught, $s;
-                        syswrite($w, '.');
-                };
-        }
-        sigprocmask(SIG_SETMASK, $oldset) or die "sigprocmask: $!";
         reopen_logs();
-        # main loop
         my $quit = 0;
-        while (1) {
-                while (my $s = shift @caught) {
-                        if ($s eq 'USR1') {
-                                reopen_logs();
-                                kill_workers($s);
-                        } elsif ($s eq 'USR2') {
-                                upgrade();
-                        } elsif ($s =~ /\A(?:QUIT|TERM|INT)\z/) {
-                                exit if $quit++;
-                                kill_workers($s);
-                        } elsif ($s eq 'WINCH') {
-                                if (-t STDIN || -t STDOUT || -t STDERR) {
-                                        warn
-"ignoring SIGWINCH since we are not daemonized\n";
-                                        $SIG{WINCH} = 'IGNORE';
-                                } else {
-                                        $worker_processes = 0;
-                                }
-                        } elsif ($s eq 'HUP') {
-                                $worker_processes = $set_workers;
-                                kill_workers($s);
-                        } elsif ($s eq 'TTIN') {
-                                if ($set_workers > $worker_processes) {
-                                        ++$worker_processes;
-                                } else {
-                                        $worker_processes = ++$set_workers;
-                                }
-                        } elsif ($s eq 'TTOU') {
-                                if ($set_workers > 0) {
-                                        $worker_processes = --$set_workers;
-                                }
-                        } elsif ($s eq 'CHLD') {
-                                reap_children();
+        my $ignore_winch;
+        my $quit_cb = sub { exit if $quit++; kill_workers($_[0]) };
+        my $sig = {
+                USR1 => sub { reopen_logs(); kill_workers($_[0]); },
+                USR2 => \&upgrade,
+                QUIT => $quit_cb,
+                INT => $quit_cb,
+                TERM => $quit_cb,
+                WINCH => sub {
+                        return if $ignore_winch;
+                        if (-t STDIN || -t STDOUT || -t STDERR) {
+                                $ignore_winch = 1;
+                                warn <<EOF;
+ignoring SIGWINCH since we are not daemonized
+EOF
+                        } else {
+                                $worker_processes = 0;
                         }
-                }
-
+                },
+                HUP => sub {
+                        $worker_processes = $set_workers;
+                        kill_workers($_[0]);
+                },
+                TTIN => sub {
+                        if ($set_workers > $worker_processes) {
+                                ++$worker_processes;
+                        } else {
+                                $worker_processes = ++$set_workers;
+                        }
+                },
+                TTOU => sub {
+                        $worker_processes = --$set_workers if $set_workers > 0;
+                },
+                CHLD => \&reap_children,
+        };
+        my $sigfd = PublicInbox::Sigfd->new($sig, 0);
+        local %SIG = (%SIG, %$sig) if !$sigfd;
+        sig_setmask($oldset) if !$sigfd;
+        while (1) { # main loop
                 my $n = scalar keys %pids;
                 if ($quit) {
                         exit if $n == 0;
@@ -549,22 +525,29 @@ sub master_loop {
                         }
                         $n = $worker_processes;
                 }
-                sigprocmask(SIG_SETMASK, $newset) or die "sigprocmask: $!";
-                foreach my $i ($n..($worker_processes - 1)) {
-                        my $pid = fork;
-                        if (!defined $pid) {
-                                warn "failed to fork worker[$i]: $!\n";
-                        } elsif ($pid == 0) {
-                                $set_user->() if $set_user;
-                                return $p0; # run normal work code
-                        } else {
-                                warn "PID=$pid is worker[$i]\n";
-                                $pids{$pid} = $i;
+                my $want = $worker_processes - 1;
+                if ($n <= $want) {
+                        sig_setmask($newset) if !$sigfd;
+                        for my $i ($n..$want) {
+                                my $pid = fork;
+                                if (!defined $pid) {
+                                        warn "failed to fork worker[$i]: $!\n";
+                                } elsif ($pid == 0) {
+                                        $set_user->() if $set_user;
+                                        return $p0; # run normal work code
+                                } else {
+                                        warn "PID=$pid is worker[$i]\n";
+                                        $pids{$pid} = $i;
+                                }
                         }
+                        sig_setmask($oldset) if !$sigfd;
+                }
+
+                if ($sigfd) { # Linux and IO::KQueue users:
+                        $sigfd->wait_once;
+                } else { # wake up every second
+                        sleep(1);
                 }
-                sigprocmask(SIG_SETMASK, $oldset) or die "sigprocmask: $!";
-                # just wait on signal events here:
-                sysread($r, my $buf, 8);
         }
         exit # never gets here, just for documentation
 }
@@ -606,6 +589,18 @@ sub daemon_loop ($$$$) {
                         $nntpd->{accept_tls} = $v;
                 }
         }
+        my $sig = {
+                HUP => $refresh,
+                INT => \&worker_quit,
+                QUIT => \&worker_quit,
+                TERM => \&worker_quit,
+                TTIN => 'IGNORE',
+                TTOU => 'IGNORE',
+                USR1 => \&reopen_logs,
+                USR2 => 'IGNORE',
+                WINCH => 'IGNORE',
+                CHLD => \&PublicInbox::DS::enqueue_reap,
+        };
         my $parent_pipe;
         if ($worker_processes > 0) {
                 $refresh->(); # preload by default
@@ -614,16 +609,11 @@ sub daemon_loop ($$$$) {
         } else {
                 reopen_logs();
                 $set_user->() if $set_user;
-                $SIG{USR2} = sub { worker_quit() if upgrade() };
+                $sig->{USR2} = sub { worker_quit() if upgrade() };
                 $refresh->();
         }
         $uid = $gid = undef;
         reopen_logs();
-        $SIG{QUIT} = $SIG{INT} = $SIG{TERM} = *worker_quit;
-        $SIG{USR1} = *reopen_logs;
-        $SIG{HUP} = $refresh;
-        $SIG{CHLD} = 'DEFAULT';
-        $SIG{$_} = 'IGNORE' for qw(USR2 TTIN TTOU WINCH);
         @listeners = map {;
                 my $tls_cb = $post_accept{sockname($_)};
 
@@ -634,7 +624,14 @@ sub daemon_loop ($$$$) {
                 # this calls epoll_create:
                 PublicInbox::Listener->new($_, $tls_cb || $post_accept)
         } @listeners;
-        sigprocmask(SIG_SETMASK, $oldset) or die "sigprocmask: $!";
+        my $sigfd = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK);
+        local %SIG = (%SIG, %$sig) if !$sigfd;
+        if (!$sigfd) {
+                # wake up every second to accept signals if we don't
+                # have signalfd or IO::KQueue:
+                sig_setmask($oldset);
+                PublicInbox::DS->SetLoopTimeout(1000);
+        }
         PublicInbox::DS->EventLoop;
         $parent_pipe = undef;
 }
diff --git a/lib/PublicInbox/Sigfd.pm b/lib/PublicInbox/Sigfd.pm
new file mode 100644
index 00000000..ec5d7145
--- /dev/null
+++ b/lib/PublicInbox/Sigfd.pm
@@ -0,0 +1,63 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+package PublicInbox::Sigfd;
+use strict;
+use parent qw(PublicInbox::DS);
+use fields qw(sig); # hashref similar to %SIG, but signal numbers as keys
+use PublicInbox::Syscall qw(signalfd EPOLLIN EPOLLET SFD_NONBLOCK);
+use POSIX ();
+use IO::Handle ();
+
+# returns a coderef to unblock signals if neither signalfd or kqueue
+# are available.
+sub new {
+        my ($class, $sig, $flags) = @_;
+        my $self = fields::new($class);
+        my %signo = map {;
+                my $cb = $sig->{$_};
+                my $num = ($_ eq 'WINCH' && $^O =~ /linux|bsd/i) ? 28 : do {
+                        my $m = "SIG$_";
+                        POSIX->$m;
+                };
+                $num => $cb;
+        } keys %$sig;
+        my $io;
+        my $fd = signalfd(-1, [keys %signo], $flags);
+        if (defined $fd && $fd >= 0) {
+                $io = IO::Handle->new_from_fd($fd, 'r+');
+        } elsif (eval { require PublicInbox::DSKQXS }) {
+                $io = PublicInbox::DSKQXS->signalfd([keys %signo], $flags);
+        } else {
+                return; # wake up every second to check for signals
+        }
+        if ($flags & SFD_NONBLOCK) { # it can go into the event loop
+                $self->SUPER::new($io, EPOLLIN | EPOLLET);
+        } else { # master main loop
+                $self->{sock} = $io;
+        }
+        $self->{sig} = \%signo;
+        $self;
+}
+
+# PublicInbox::Daemon in master main loop (blocking)
+sub wait_once ($) {
+        my ($self) = @_;
+        my $r = sysread($self->{sock}, my $buf, 128 * 64);
+        if (defined($r)) {
+                while (1) {
+                        my $sig = unpack('L', $buf);
+                        my $cb = $self->{sig}->{$sig};
+                        $cb->($sig) if $cb ne 'IGNORE';
+                        return $r if length($buf) == 128;
+                        $buf = substr($buf, 128);
+                }
+        }
+        $r;
+}
+
+# called by PublicInbox::DS in epoll_wait loop
+sub event_step {
+        while (wait_once($_[0])) {} # non-blocking
+}
+
+1;
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index da8a6c86..487013d5 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -24,7 +24,8 @@ $VERSION     = "0.25";
 @EXPORT_OK   = qw(epoll_ctl epoll_create epoll_wait
                   EPOLLIN EPOLLOUT EPOLLET
                   EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
-                  EPOLLONESHOT EPOLLEXCLUSIVE);
+                  EPOLLONESHOT EPOLLEXCLUSIVE
+                  signalfd SFD_NONBLOCK);
 %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
                              EPOLLIN EPOLLOUT
                              EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
@@ -42,6 +43,11 @@ use constant EPOLLET => (1 << 31);
 use constant EPOLL_CTL_ADD => 1;
 use constant EPOLL_CTL_DEL => 2;
 use constant EPOLL_CTL_MOD => 3;
+use constant {
+        SFD_CLOEXEC => 02000000,
+        SFD_NONBLOCK => 00004000,
+};
+
 
 our $loaded_syscall = 0;
 
@@ -63,6 +69,7 @@ our (
      $SYS_epoll_create,
      $SYS_epoll_ctl,
      $SYS_epoll_wait,
+     $SYS_signalfd4,
      );
 
 our $no_deprecated = 0;
@@ -88,63 +95,75 @@ if ($^O eq "linux") {
         $SYS_epoll_create = 254;
         $SYS_epoll_ctl    = 255;
         $SYS_epoll_wait   = 256;
+        $SYS_signalfd4 = 327;
     } elsif ($machine eq "x86_64") {
         $SYS_epoll_create = 213;
         $SYS_epoll_ctl    = 233;
         $SYS_epoll_wait   = 232;
+        $SYS_signalfd4 = 289;
     } elsif ($machine =~ m/^parisc/) {
         $SYS_epoll_create = 224;
         $SYS_epoll_ctl    = 225;
         $SYS_epoll_wait   = 226;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 309;
     } elsif ($machine =~ m/^ppc64/) {
         $SYS_epoll_create = 236;
         $SYS_epoll_ctl    = 237;
         $SYS_epoll_wait   = 238;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 313;
     } elsif ($machine eq "ppc") {
         $SYS_epoll_create = 236;
         $SYS_epoll_ctl    = 237;
         $SYS_epoll_wait   = 238;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 313;
     } elsif ($machine =~ m/^s390/) {
         $SYS_epoll_create = 249;
         $SYS_epoll_ctl    = 250;
         $SYS_epoll_wait   = 251;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 322;
     } elsif ($machine eq "ia64") {
         $SYS_epoll_create = 1243;
         $SYS_epoll_ctl    = 1244;
         $SYS_epoll_wait   = 1245;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 289;
     } elsif ($machine eq "alpha") {
         # natural alignment, ints are 32-bits
         $SYS_epoll_create = 407;
         $SYS_epoll_ctl    = 408;
         $SYS_epoll_wait   = 409;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 484;
     } elsif ($machine eq "aarch64") {
         $SYS_epoll_create = 20;  # (sys_epoll_create1)
         $SYS_epoll_ctl    = 21;
         $SYS_epoll_wait   = 22;  # (sys_epoll_pwait)
         $u64_mod_8        = 1;
         $no_deprecated    = 1;
+        $SYS_signalfd4 = 74;
     } elsif ($machine =~ m/arm(v\d+)?.*l/) {
         # ARM OABI
         $SYS_epoll_create = 250;
         $SYS_epoll_ctl    = 251;
         $SYS_epoll_wait   = 252;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 355;
     } elsif ($machine =~ m/^mips64/) {
         $SYS_epoll_create = 5207;
         $SYS_epoll_ctl    = 5208;
         $SYS_epoll_wait   = 5209;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 5283;
     } elsif ($machine =~ m/^mips/) {
         $SYS_epoll_create = 4248;
         $SYS_epoll_ctl    = 4249;
         $SYS_epoll_wait   = 4250;
         $u64_mod_8        = 1;
+        $SYS_signalfd4 = 4324;
     } else {
         # as a last resort, try using the *.ph files which may not
         # exist or may be wrong
@@ -152,6 +171,11 @@ if ($^O eq "linux") {
         $SYS_epoll_create = eval { &SYS_epoll_create; } || 0;
         $SYS_epoll_ctl    = eval { &SYS_epoll_ctl;    } || 0;
         $SYS_epoll_wait   = eval { &SYS_epoll_wait;   } || 0;
+
+        # Note: do NOT add new syscalls to depend on *.ph, here.
+        # Better to miss syscalls (so we can fallback to IO::Poll)
+        # than to use wrong ones, since the names are not stable
+        # (at least not on FreeBSD), if the actual numbers are.
     }
 
     if ($u64_mod_8) {
@@ -228,6 +252,22 @@ sub epoll_wait_mod8 {
     return $ct;
 }
 
+sub signalfd ($$$) {
+        my ($fd, $signos, $flags) = @_;
+        if ($SYS_signalfd4) {
+                # Not sure if there's a way to get pack/unpack to get the
+                # contents of POSIX::SigSet to a buffer, but prepping the
+                # bitmap like one would for select() works:
+                my $buf = "\0" x 8;
+                vec($buf, $_ - 1, 1) = 1 for @$signos;
+
+                syscall($SYS_signalfd4, $fd, $buf, 8, $flags|SFD_CLOEXEC);
+        } else {
+                $! = ENOSYS;
+                undef;
+        }
+}
+
 1;
 
 =head1 WARRANTY
diff --git a/t/ds-kqxs.t b/t/ds-kqxs.t
index 785570c3..43b6333f 100644
--- a/t/ds-kqxs.t
+++ b/t/ds-kqxs.t
@@ -10,5 +10,33 @@ unless (eval { require IO::KQueue }) {
                                 : "no IO::KQueue, skipping $0: $@";
         plan skip_all => $m;
 }
+
+if ('ensure nested kqueue works for signalfd emulation') {
+        require POSIX;
+        my $new = POSIX::SigSet->new(POSIX::SIGHUP());
+        my $old = POSIX::SigSet->new;
+        my $hup = 0;
+        local $SIG{HUP} = sub { $hup++ };
+        POSIX::sigprocmask(POSIX::SIG_SETMASK(), $new, $old) or die;
+        my $kqs = IO::KQueue->new or die;
+        $kqs->EV_SET(POSIX::SIGHUP(), IO::KQueue::EVFILT_SIGNAL(),
+                        IO::KQueue::EV_ADD());
+        kill('HUP', $$) or die;
+        my @events = $kqs->kevent(3000);
+        is(scalar(@events), 1, 'got one event');
+        is($events[0]->[0], POSIX::SIGHUP(), 'got SIGHUP');
+        my $parent = IO::KQueue->new or die;
+        my $kqfd = $$kqs;
+        $parent->EV_SET($kqfd, IO::KQueue::EVFILT_READ(), IO::KQueue::EV_ADD());
+        kill('HUP', $$) or die;
+        @events = $parent->kevent(3000);
+        is(scalar(@events), 1, 'got one event');
+        is($events[0]->[0], $kqfd, 'got kqfd');
+        is($hup, 0, '$SIG{HUP} did not fire');
+        POSIX::sigprocmask(POSIX::SIG_SETMASK(), $old) or die;
+        defined(POSIX::close($kqfd)) or die;
+        defined(POSIX::close($$parent)) or die;
+}
+
 local $ENV{TEST_IOPOLLER} = 'PublicInbox::DSKQXS';
 require './t/ds-poll.t';
diff --git a/t/sigfd.t b/t/sigfd.t
new file mode 100644
index 00000000..34f30de8
--- /dev/null
+++ b/t/sigfd.t
@@ -0,0 +1,65 @@
+# Copyright (C) 2019 all contributors <meta@public-inbox.org>
+use strict;
+use Test::More;
+use IO::Handle;
+use POSIX qw(:signal_h);
+use Errno qw(ENOSYS);
+use PublicInbox::Syscall qw(SFD_NONBLOCK);
+require_ok 'PublicInbox::Sigfd';
+
+SKIP: {
+        if ($^O ne 'linux' && !eval { require IO::KQueue }) {
+                skip 'signalfd requires Linux or IO::KQueue to emulate', 10;
+        }
+        my $new = POSIX::SigSet->new;
+        $new->fillset or die "sigfillset: $!";
+        my $old = POSIX::SigSet->new;
+        sigprocmask(SIG_SETMASK, $new, $old) or die "sigprocmask $!";
+        my $hit = {};
+        my $sig = {};
+        local $SIG{HUP} = sub { $hit->{HUP}->{normal}++ };
+        local $SIG{TERM} = sub { $hit->{TERM}->{normal}++ };
+        local $SIG{INT} = sub { $hit->{INT}->{normal}++ };
+        for my $s (qw(HUP TERM INT)) {
+                $sig->{$s} = sub { $hit->{$s}->{sigfd}++ };
+        }
+        my $sigfd = PublicInbox::Sigfd->new($sig, 0);
+        if ($sigfd) {
+                require PublicInbox::DS;
+                ok($sigfd, 'Sigfd->new works');
+                kill('HUP', $$) or die "kill $!";
+                kill('INT', $$) or die "kill $!";
+                my $fd = fileno($sigfd->{sock});
+                ok($fd >= 0, 'fileno(Sigfd->{sock}) works');
+                my $rvec = '';
+                vec($rvec, $fd, 1) = 1;
+                is(select($rvec, undef, undef, undef), 1, 'select() works');
+                ok($sigfd->wait_once, 'wait_once reported success');
+                for my $s (qw(HUP INT)) {
+                        is($hit->{$s}->{sigfd}, 1, "sigfd fired $s");
+                        is($hit->{$s}->{normal}, undef,
+                                'normal $SIG{$s} not fired');
+                }
+                $sigfd = undef;
+
+                my $nbsig = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK);
+                ok($nbsig, 'Sigfd->new SFD_NONBLOCK works');
+                is($nbsig->wait_once, undef, 'nonblocking ->wait_once');
+                ok($! == Errno::EAGAIN, 'got EAGAIN');
+                kill('HUP', $$) or die "kill $!";
+                PublicInbox::DS->SetPostLoopCallback(sub {}); # loop once
+                PublicInbox::DS->EventLoop;
+                is($hit->{HUP}->{sigfd}, 2, 'HUP sigfd fired in event loop');
+                kill('TERM', $$) or die "kill $!";
+                kill('HUP', $$) or die "kill $!";
+                PublicInbox::DS->EventLoop;
+                PublicInbox::DS->Reset;
+                is($hit->{TERM}->{sigfd}, 1, 'TERM sigfd fired in event loop');
+                is($hit->{HUP}->{sigfd}, 3, 'HUP sigfd fired in event loop');
+        } else {
+                skip('signalfd disabled?', 10);
+        }
+        sigprocmask(SIG_SETMASK, $old) or die "sigprocmask $!";
+}
+
+done_testing;