From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id E24AD1F461 for ; Wed, 26 Jun 2019 08:32:23 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] ds: share lazy rbuf handling between HTTP and NNTP Date: Wed, 26 Jun 2019 08:32:23 +0000 Message-Id: <20190626083223.10652-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Doing this for HTTP cuts the memory usage of 10K idle-after-one-request HTTP clients from 92 MB to 47 MB. The savings over the equivalent NNTP change in commit 6f173864f5acac89769a67739b8c377510711d49, ("nntp: lazily allocate and stash rbuf") seems down to the size of HTTP requests and the fact HTTP is a client-sends-first protocol where as NNTP is server-sends-first. --- lib/PublicInbox/DS.pm | 16 +++++++-- lib/PublicInbox/HTTP.pm | 79 ++++++++++++++++++++--------------------- lib/PublicInbox/NNTP.pm | 8 ++--- 3 files changed, 55 insertions(+), 48 deletions(-) diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index a8700bc5..28240843 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -28,6 +28,7 @@ use 5.010_001; use PublicInbox::Syscall qw(:epoll); use fields ('sock', # underlying socket + 'rbuf', # scalarref, usually undef 'wbuf', # arrayref of coderefs or GLOB refs 'wbuf_off', # offset into first element of wbuf to start writing at ); @@ -412,16 +413,27 @@ next_buf: 1; # all done } -sub do_read ($$$$) { +sub rbuf_idle ($$) { + my ($self, $rbuf) = @_; + if ($$rbuf eq '') { # who knows how long till we can read again + delete $self->{rbuf}; + } else { + $self->{rbuf} = $rbuf; + } +} + +sub do_read ($$$;$) { my ($self, $rbuf, $len, $off) = @_; - my $r = sysread($self->{sock}, $$rbuf, $len, $off); + my $r = sysread($self->{sock}, $$rbuf, $len, $off // 0); return ($r == 0 ? $self->close : $r) if defined $r; # common for clients to break connections without warning, # would be too noisy to log here: if (ref($self) eq 'IO::Socket::SSL') { my $ev = PublicInbox::TLS::epollbit() or return $self->close; + rbuf_idle($self, $rbuf); watch($self, $ev | EPOLLONESHOT); } elsif ($! == EAGAIN) { + rbuf_idle($self, $rbuf); watch($self, EPOLLIN | EPOLLONESHOT); } else { $self->close; diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index a1cb4aca..1153ef98 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -11,7 +11,7 @@ package PublicInbox::HTTP; use strict; use warnings; use base qw(PublicInbox::DS); -use fields qw(httpd env rbuf input_left remote_addr remote_port forward pull); +use fields qw(httpd env input_left remote_addr remote_port forward pull); use bytes (); # only for bytes::length use Fcntl qw(:seek); use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl @@ -60,7 +60,6 @@ sub new ($$$) { my $self = fields::new($class); $self->SUPER::new($sock, EPOLLIN | EPOLLONESHOT); $self->{httpd} = $httpd; - $self->{rbuf} = ''; ($self->{remote_addr}, $self->{remote_port}) = PublicInbox::Daemon::host_with_port($addr); $self; @@ -75,31 +74,34 @@ sub event_step { # called by PublicInbox::DS # otherwise we can be buffering infinitely w/o backpressure return read_input($self) if defined $self->{env}; - my $rbuf = \($self->{rbuf}); - my $off = bytes::length($$rbuf); - $self->do_read($rbuf, 8192, $off) and rbuf_process($self); + my $rbuf = $self->{rbuf} // (\(my $x = '')); + $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return; + rbuf_process($self, $rbuf); } sub rbuf_process { - my ($self) = @_; + my ($self, $rbuf) = @_; + $rbuf //= $self->{rbuf} // (\(my $x = '')); my %env = %{$self->{httpd}->{env}}; # full hash copy - my $r = parse_http_request($self->{rbuf}, \%env); + my $r = parse_http_request($$rbuf, \%env); # We do not support Trailers in chunked requests, for now # (they are rarely-used and git (as of 2.7.2) does not use them) if ($r == -1 || $env{HTTP_TRAILER} || # this length-check is necessary for PURE_PERL=1: - ($r == -2 && bytes::length($self->{rbuf}) > 0x4000)) { + ($r == -2 && bytes::length($$rbuf) > 0x4000)) { return quit($self, 400); } - return $self->watch_in1 if $r < 0; # incomplete - $self->{rbuf} = substr($self->{rbuf}, $r); - + if ($r < 0) { # incomplete + $self->rbuf_idle($rbuf); + return $self->watch_in1; + } + $$rbuf = substr($$rbuf, $r); my $len = input_prepare($self, \%env); defined $len or return write_err($self, undef); # EMFILE/ENFILE - $len ? read_input($self) : app_dispatch($self); + $len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf); } # IO::Handle::write returns boolean, this returns bytes written: @@ -111,16 +113,15 @@ sub xwrite ($$$) { $w; } -sub read_input ($) { - my ($self) = @_; +sub read_input ($;$) { + my ($self, $rbuf) = @_; + $rbuf //= $self->{rbuf} // (\(my $x = '')); my $env = $self->{env}; return if $env->{REMOTE_ADDR}; # in app dispatch - return read_input_chunked($self) if env_chunked($env); + return read_input_chunked($self, $rbuf) if env_chunked($env); # env->{CONTENT_LENGTH} (identity) - my $sock = $self->{sock}; my $len = delete $self->{input_left}; - my $rbuf = \($self->{rbuf}); my $input = $env->{'psgi.input'}; while ($len > 0) { @@ -135,15 +136,15 @@ sub read_input ($) { } $$rbuf = ''; } - my $r = sysread($sock, $$rbuf, 8192); - return recv_err($self, $r, $len) unless $r; + $self->do_read($rbuf, 8192) or return recv_err($self, $len); # continue looping if $r > 0; } - app_dispatch($self, $input); + app_dispatch($self, $input, $rbuf); } sub app_dispatch { - my ($self, $input) = @_; + my ($self, $input, $rbuf) = @_; + $self->rbuf_idle($rbuf); my $env = $self->{env}; $env->{REMOTE_ADDR} = $self->{remote_addr}; $env->{REMOTE_PORT} = $self->{remote_port}; @@ -235,11 +236,12 @@ sub identity_wcb ($) { sub next_request ($) { my ($self) = @_; - if ($self->{rbuf} eq '') { # wait for next request - $self->watch_in1; - } else { # avoid recursion for pipelined requests + if ($self->{rbuf}) { + # avoid recursion for pipelined requests push @$pipelineq, $self; $pipet ||= PublicInbox::EvCleanup::asap(*process_pipelineq); + } else { # wait for next request + $self->watch_in1; } } @@ -360,27 +362,25 @@ sub write_err { } sub recv_err { - my ($self, $r, $len) = @_; - return $self->close if (defined $r && $r == 0); - if ($! == EAGAIN) { + my ($self, $len) = @_; + if ($! == EAGAIN) { # epoll/kevent watch already set by do_read $self->{input_left} = $len; - return $self->watch_in1; + } else { + err($self, "error reading input: $! ($len bytes remaining)"); } - err($self, "error reading for input: $! ($len bytes remaining)"); - quit($self, 500); } sub read_input_chunked { # unlikely... - my ($self) = @_; + my ($self, $rbuf) = @_; + $rbuf //= $self->{rbuf} // (\(my $x = '')); my $input = $self->{env}->{'psgi.input'}; - my $sock = $self->{sock}; my $len = delete $self->{input_left}; - my $rbuf = \($self->{rbuf}); while (1) { # chunk start if ($len == CHUNK_ZEND) { $$rbuf =~ s/\A\r\n//s and - return app_dispatch($self, $input); + return app_dispatch($self, $input, $rbuf); + return quit($self, 400) if bytes::length($$rbuf) > 2; } if ($len == CHUNK_END) { @@ -403,9 +403,8 @@ sub read_input_chunked { # unlikely... } if ($len < 0) { # chunk header is trickled, read more - my $off = bytes::length($$rbuf); - my $r = sysread($sock, $$rbuf, 8192, $off); - return recv_err($self, $r, $len) unless $r; + $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or + return recv_err($self, $len); # (implicit) goto chunk_start if $r > 0; } $len = CHUNK_ZEND if $len == 0; @@ -429,8 +428,8 @@ sub read_input_chunked { # unlikely... } if ($$rbuf eq '') { # read more of current chunk - my $r = sysread($sock, $$rbuf, 8192); - return recv_err($self, $r, $len) unless $r; + $self->do_read($rbuf, 8192) or + return recv_err($self, $len); } } } @@ -459,7 +458,7 @@ sub close { # for graceful shutdown in PublicInbox::Daemon: sub busy () { my ($self) = @_; - ($self->{rbuf} ne '' || $self->{env} || $self->{wbuf}); + ($self->{rbuf} || $self->{env} || $self->{wbuf}); } # fires after pending writes are complete: diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm index 53e18281..0a053627 100644 --- a/lib/PublicInbox/NNTP.pm +++ b/lib/PublicInbox/NNTP.pm @@ -6,7 +6,7 @@ package PublicInbox::NNTP; use strict; use warnings; use base qw(PublicInbox::DS); -use fields qw(nntpd article rbuf ng); +use fields qw(nntpd article ng); use PublicInbox::Search; use PublicInbox::Msgmap; use PublicInbox::MID qw(mid_escape); @@ -985,11 +985,7 @@ sub event_step { return $self->close if $r < 0; my $len = bytes::length($$rbuf); return $self->close if ($len >= LINE_MAX); - if ($len) { - $self->{rbuf} = $rbuf; - } else { - delete $self->{rbuf}; - } + $self->rbuf_idle($rbuf); update_idle_time($self); # maybe there's more pipelined data, or we'll have -- EW