about summary refs log tree commit
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-05-03 10:34:09 +0000
committerEric Wong <e@80x24.org>2019-05-05 00:22:49 +0000
commit5aaea61844b92c452c201ce9832e3c5c68c6f84e (patch)
tree7afcdd8e62af3d0611bd24d04f7d44fe8faf7487
parentcd50d183273c105a7f08b1875ba6f7a51d9f8e9a (diff)
downloadpublic-inbox-5aaea61844b92c452c201ce9832e3c5c68c6f84e.tar.gz
Since our listen sockets are non-blocking and we may run
multiple httpd|nntpd processes; we need a way to avoid
thundering herds when there are multiple httpd|nntpd worker
processes.

EPOLLEXCLUSIVE was added just for that in Linux 4.5
-rw-r--r--TODO3
-rw-r--r--lib/PublicInbox/DS.pm22
-rw-r--r--lib/PublicInbox/Listener.pm2
-rw-r--r--lib/PublicInbox/Syscall.pm7
4 files changed, 22 insertions, 12 deletions
diff --git a/TODO b/TODO
index 372f7331..ac255b86 100644
--- a/TODO
+++ b/TODO
@@ -56,9 +56,6 @@ all need to be considered for everything we introduce)
   ugh... https://rt.cpan.org/Ticket/Display.html?id=116615
   (IO::KQueue is broken with Danga::Socket / PublicInbox::DS)
 
-* EPOLLEXCLUSIVE for listen socket fairness across -httpd/nntpd
-  worker processes.
-
 * improve documentation
 
 * linkify thread skeletons better
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 543d3fdc..3ccc275d 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -78,6 +78,8 @@ our (
      @Timers,                    # timers
      );
 
+# this may be set to zero with old kernels
+our $EPOLLEXCLUSIVE = EPOLLEXCLUSIVE;
 Reset();
 
 #####################################################################
@@ -666,11 +668,9 @@ This is normally (always?) called from your subclass via:
 
 =cut
 sub new {
-    my PublicInbox::DS $self = shift;
+    my ($self, $sock, $exclusive) = @_;
     $self = fields::new($self) unless ref $self;
 
-    my $sock = shift;
-
     $self->{sock}        = $sock;
     my $fd = fileno($sock);
 
@@ -685,13 +685,23 @@ sub new {
     $self->{corked} = 0;
     $self->{read_push_back} = [];
 
-    $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL;
+    my $ev = $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL;
 
     _InitPoller();
 
     if ($HaveEpoll) {
-        epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $self->{event_watch})
-            and die "couldn't add epoll watch for $fd\n";
+        if ($exclusive) {
+            $ev = $self->{event_watch} = EPOLLIN|EPOLLERR|EPOLLHUP|$EPOLLEXCLUSIVE;
+        }
+retry:
+        if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
+            if ($!{EINVAL} && ($ev & $EPOLLEXCLUSIVE)) {
+                $EPOLLEXCLUSIVE = 0; # old kernel
+                $ev = $self->{event_watch} = EPOLLIN|EPOLLERR|EPOLLHUP;
+                goto retry;
+            }
+            die "couldn't add epoll watch for $fd: $!\n";
+        }
     }
     elsif ($HaveKQueue) {
         # Add them to the queue but disabled for now
diff --git a/lib/PublicInbox/Listener.pm b/lib/PublicInbox/Listener.pm
index d1f0d2e9..a75a6fd9 100644
--- a/lib/PublicInbox/Listener.pm
+++ b/lib/PublicInbox/Listener.pm
@@ -17,7 +17,7 @@ sub new ($$$) {
         listen($s, 1024);
         IO::Handle::blocking($s, 0);
         my $self = fields::new($class);
-        $self->SUPER::new($s); # calls epoll_create for the first socket
+        $self->SUPER::new($s, 1); # calls epoll_create for the first socket
         $self->watch_read(1);
         $self->{post_accept} = $cb;
         $self
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index cf700454..91943647 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -23,10 +23,12 @@ $VERSION     = "0.25";
 @ISA         = qw(Exporter);
 @EXPORT_OK   = qw(sendfile epoll_ctl epoll_create epoll_wait
                   EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND
-                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD);
+                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
+                  EPOLLEXCLUSIVE);
 %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
                              EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND
-                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD)],
+                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
+                             EPOLLEXCLUSIVE)],
                 sendfile => [qw(sendfile)],
                 );
 
@@ -35,6 +37,7 @@ use constant EPOLLOUT      => 4;
 use constant EPOLLERR      => 8;
 use constant EPOLLHUP      => 16;
 use constant EPOLLRDBAND   => 128;
+use constant EPOLLEXCLUSIVE => (1 << 28);
 use constant EPOLL_CTL_ADD => 1;
 use constant EPOLL_CTL_DEL => 2;
 use constant EPOLL_CTL_MOD => 3;