user/dev discussion of public-inbox itself
 help / color / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 2/4] listener: use EPOLLEXCLUSIVE for listen sockets
Date: Sun,  5 May 2019 00:52:17 +0000
Message-ID: <20190505005219.31772-3-e@80x24.org> (raw)
In-Reply-To: <20190505005219.31772-1-e@80x24.org>

Since our listen sockets are non-blocking and we may run
multiple httpd|nntpd processes; we need a way to avoid
thundering herds when there are multiple httpd|nntpd worker
processes.

EPOLLEXCLUSIVE was added just for that in Linux 4.5
---
 TODO                        |  3 ---
 lib/PublicInbox/DS.pm       | 22 ++++++++++++++++------
 lib/PublicInbox/Listener.pm |  2 +-
 lib/PublicInbox/Syscall.pm  |  7 +++++--
 4 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/TODO b/TODO
index 372f733..ac255b8 100644
--- a/TODO
+++ b/TODO
@@ -56,9 +56,6 @@ all need to be considered for everything we introduce)
   ugh... https://rt.cpan.org/Ticket/Display.html?id=116615
   (IO::KQueue is broken with Danga::Socket / PublicInbox::DS)
 
-* EPOLLEXCLUSIVE for listen socket fairness across -httpd/nntpd
-  worker processes.
-
 * improve documentation
 
 * linkify thread skeletons better
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 543d3fd..3ccc275 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -78,6 +78,8 @@ our (
      @Timers,                    # timers
      );
 
+# this may be set to zero with old kernels
+our $EPOLLEXCLUSIVE = EPOLLEXCLUSIVE;
 Reset();
 
 #####################################################################
@@ -666,11 +668,9 @@ This is normally (always?) called from your subclass via:
 
 =cut
 sub new {
-    my PublicInbox::DS $self = shift;
+    my ($self, $sock, $exclusive) = @_;
     $self = fields::new($self) unless ref $self;
 
-    my $sock = shift;
-
     $self->{sock}        = $sock;
     my $fd = fileno($sock);
 
@@ -685,13 +685,23 @@ sub new {
     $self->{corked} = 0;
     $self->{read_push_back} = [];
 
-    $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL;
+    my $ev = $self->{event_watch} = POLLERR|POLLHUP|POLLNVAL;
 
     _InitPoller();
 
     if ($HaveEpoll) {
-        epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $self->{event_watch})
-            and die "couldn't add epoll watch for $fd\n";
+        if ($exclusive) {
+            $ev = $self->{event_watch} = EPOLLIN|EPOLLERR|EPOLLHUP|$EPOLLEXCLUSIVE;
+        }
+retry:
+        if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) {
+            if ($!{EINVAL} && ($ev & $EPOLLEXCLUSIVE)) {
+                $EPOLLEXCLUSIVE = 0; # old kernel
+                $ev = $self->{event_watch} = EPOLLIN|EPOLLERR|EPOLLHUP;
+                goto retry;
+            }
+            die "couldn't add epoll watch for $fd: $!\n";
+        }
     }
     elsif ($HaveKQueue) {
         # Add them to the queue but disabled for now
diff --git a/lib/PublicInbox/Listener.pm b/lib/PublicInbox/Listener.pm
index d1f0d2e..a75a6fd 100644
--- a/lib/PublicInbox/Listener.pm
+++ b/lib/PublicInbox/Listener.pm
@@ -17,7 +17,7 @@ sub new ($$$) {
 	listen($s, 1024);
 	IO::Handle::blocking($s, 0);
 	my $self = fields::new($class);
-	$self->SUPER::new($s); # calls epoll_create for the first socket
+	$self->SUPER::new($s, 1); # calls epoll_create for the first socket
 	$self->watch_read(1);
 	$self->{post_accept} = $cb;
 	$self
diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm
index cf70045..9194364 100644
--- a/lib/PublicInbox/Syscall.pm
+++ b/lib/PublicInbox/Syscall.pm
@@ -23,10 +23,12 @@ $VERSION     = "0.25";
 @ISA         = qw(Exporter);
 @EXPORT_OK   = qw(sendfile epoll_ctl epoll_create epoll_wait
                   EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND
-                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD);
+                  EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
+                  EPOLLEXCLUSIVE);
 %EXPORT_TAGS = (epoll => [qw(epoll_ctl epoll_create epoll_wait
                              EPOLLIN EPOLLOUT EPOLLERR EPOLLHUP EPOLLRDBAND
-                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD)],
+                             EPOLL_CTL_ADD EPOLL_CTL_DEL EPOLL_CTL_MOD
+                             EPOLLEXCLUSIVE)],
                 sendfile => [qw(sendfile)],
                 );
 
@@ -35,6 +37,7 @@ use constant EPOLLOUT      => 4;
 use constant EPOLLERR      => 8;
 use constant EPOLLHUP      => 16;
 use constant EPOLLRDBAND   => 128;
+use constant EPOLLEXCLUSIVE => (1 << 28);
 use constant EPOLL_CTL_ADD => 1;
 use constant EPOLL_CTL_DEL => 2;
 use constant EPOLL_CTL_MOD => 3;
-- 
EW


  parent reply index

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-05  0:52 [PATCH 0/4] bundle Danga::Socket and Sys::Syscall Eric Wong
2019-05-05  0:52 ` [PATCH 1/4] " Eric Wong
2019-05-05  4:56   ` [PATCH 5/4] DS: workaround IO::Kqueue EINTR (mis-)handling Eric Wong
2019-05-08  9:07   ` [PATCH 6/4] DS: handle EINTR in IO::Poll path, too Eric Wong
2019-05-05  0:52 ` Eric Wong [this message]
2019-05-05  0:52 ` [PATCH 3/4] DS: remove unused fields and functions Eric Wong
2019-05-05  0:52 ` [PATCH 4/4] DS: drop profiling support Eric Wong
2019-05-08 19:18 ` [PATCH 0/4] Danga::Socket bundling cleanups Eric Wong
2019-05-08 19:18   ` [PATCH 1/4] build: do not manify DS and Syscall pods Eric Wong
2019-05-08 19:18   ` [PATCH 2/4] syscall: drop readahead wrapper Eric Wong
2019-05-08 19:18   ` [PATCH 3/4] DS: drop unused "_undef" sub Eric Wong
2019-05-08 19:18   ` [PATCH 4/4] DS: epoll: fix misordered EPOLL_CTL_DEL call Eric Wong

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190505005219.31772-3-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox