From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.9 required=3.0 tests=ALL_TRUSTED,BAYES_00, RP_MATCHES_RCVD,URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 9F73C633823 for ; Sat, 30 Apr 2016 08:36:36 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 2/2] daemon: graceful shutdown warning and limit removal Date: Sat, 30 Apr 2016 08:36:34 +0000 Message-Id: <20160430083634.28543-3-e@80x24.org> In-Reply-To: <20160430083634.28543-1-e@80x24.org> References: <20160430083634.28543-1-e@80x24.org> List-Id: git clones may take longer than 30s, much longer... So prepare to wait almost indefinitely for sockets to timeout and document the second signal behavior for immediate shutdown. While we're at it, move parent death handling to a separate class to avoid Danga::Socket->AddOtherFds, since that does not allow proper handling the parent pipe being closed and would actually misterminate a worker prematurely. t/nntpd.t is update to illustrate the failure with workers enabled. We will work to keep memory usage low and let clients take their time without interrupting them. --- lib/PublicInbox/Daemon.pm | 41 ++++++++++++++++++++++++++++++----------- lib/PublicInbox/ParentPipe.pm | 21 +++++++++++++++++++++ t/nntpd.t | 12 ++++++++---- 3 files changed, 59 insertions(+), 15 deletions(-) create mode 100644 lib/PublicInbox/ParentPipe.pm diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index c9594a3..8de7ff2 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -14,6 +14,7 @@ STDERR->autoflush(1); require Danga::Socket; require POSIX; require PublicInbox::Listener; +require PublicInbox::ParentPipe; my @CMD; my $set_user; my (@cfg_listen, $stdout, $stderr, $group, $user, $pid_file, $daemonize); @@ -161,16 +162,18 @@ sub daemonize () { } } -sub worker_quit () { + +sub worker_quit { + my ($reason) = @_; # killing again terminates immediately: exit unless @listeners; $_->close foreach @listeners; # call Danga::Socket::close @listeners = (); + $reason->close if ref($reason) eq 'PublicInbox::ParentPipe'; - # give slow clients 30s to finish reading/writing whatever - Danga::Socket->AddTimer(30, sub { exit }); - + my $proc_name; + my $warn = 0; # drop idle connections and try to quit gracefully Danga::Socket->SetPostLoopCallback(sub { my ($dmap, undef) = @_; @@ -178,12 +181,23 @@ sub worker_quit () { foreach my $s (values %$dmap) { if ($s->can('busy') && $s->busy) { - $n = 1; + ++$n; } else { # close as much as possible, early as possible $s->close; } } + if ($n) { + if (($warn + 5) < time) { + warn "$$ quitting, $n client(s) left\n"; + $warn = time; + } + unless (defined $proc_name) { + $proc_name = (split(/\s+/, $0))[0]; + $proc_name =~ s!\A.*?([^/]+)\z!$1!; + } + $0 = "$proc_name quitting, $n client(s) left"; + } $n; # true: loop continues, false: loop breaks }); } @@ -359,6 +373,7 @@ sub master_loop { } reopen_logs(); # main loop + my $quit = 0; while (1) { while (my $s = shift @caught) { if ($s eq 'USR1') { @@ -367,8 +382,8 @@ sub master_loop { } elsif ($s eq 'USR2') { upgrade(); } elsif ($s =~ /\A(?:QUIT|TERM|INT)\z/) { - # drops pipes and causes children to die - exit + exit if $quit++; + kill_workers($s); } elsif ($s eq 'WINCH') { $worker_processes = 0; } elsif ($s eq 'HUP') { @@ -390,6 +405,11 @@ sub master_loop { } my $n = scalar keys %pids; + if ($quit) { + exit if $n == 0; + $set_workers = $worker_processes = $n = 0; + } + if ($n > $worker_processes) { while (my ($k, $v) = each %pids) { kill('TERM', $k) if $v >= $worker_processes; @@ -419,13 +439,12 @@ sub daemon_loop ($$) { my $parent_pipe; if ($worker_processes > 0) { $refresh->(); # preload by default - $parent_pipe = master_loop(); # returns if in child process - my $fd = fileno($parent_pipe); - Danga::Socket->AddOtherFds($fd => *worker_quit); + my $fh = master_loop(); # returns if in child process + $parent_pipe = PublicInbox::ParentPipe->new($fh, *worker_quit); } else { reopen_logs(); $set_user->() if $set_user; - $SIG{USR2} = sub { worker_quit() if upgrade() }; + $SIG{USR2} = sub { worker_quit('USR2') if upgrade() }; $refresh->(); } $uid = $gid = undef; diff --git a/lib/PublicInbox/ParentPipe.pm b/lib/PublicInbox/ParentPipe.pm new file mode 100644 index 0000000..d2d054c --- /dev/null +++ b/lib/PublicInbox/ParentPipe.pm @@ -0,0 +1,21 @@ +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +# only for PublicInbox::Daemon +package PublicInbox::ParentPipe; +use strict; +use warnings; +use base qw(Danga::Socket); +use fields qw(cb); + +sub new ($$$) { + my ($class, $pipe, $cb) = @_; + my $self = fields::new($class); + $self->SUPER::new($pipe); + $self->{cb} = $cb; + $self->watch_read(1); + $self; +} + +sub event_read { $_[0]->{cb}->($_[0]) } + +1; diff --git a/t/nntpd.t b/t/nntpd.t index d597855..d033221 100644 --- a/t/nntpd.t +++ b/t/nntpd.t @@ -185,7 +185,12 @@ EOF 'XHDR on invalid header returns empty'); { - syswrite($s, "HDR List-id 1-\r\n"); + setsockopt($s, IPPROTO_TCP, TCP_NODELAY, 1); + syswrite($s, 'HDR List-id 1-'); + select(undef, undef, undef, 0.15); + ok(kill('TERM', $pid), 'killed nntpd'); + select(undef, undef, undef, 0.15); + syswrite($s, "\r\n"); $buf = ''; do { sysread($s, $buf, 4096, length($buf)); @@ -196,9 +201,8 @@ EOF is(scalar @r, 1, 'only one response line'); } - ok(kill('TERM', $pid), 'killed nntpd'); - $pid = undef; - waitpid(-1, 0); + is($pid, waitpid($pid, 0), 'nntpd exited successfully'); + is($?, 0, 'no error in exited process'); } done_testing(); -- EW