From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 01/11] ds: share lazy rbuf handling between HTTP and NNTP
Date: Sat, 29 Jun 2019 19:59:41 +0000 [thread overview]
Message-ID: <20190629195951.32160-2-e@80x24.org> (raw)
In-Reply-To: <20190629195951.32160-1-e@80x24.org>
Doing this for HTTP cuts the memory usage of 10K
idle-after-one-request HTTP clients from 92 MB to 47 MB.
The savings over the equivalent NNTP change in commit
6f173864f5acac89769a67739b8c377510711d49,
("nntp: lazily allocate and stash rbuf") seems down to the
size of HTTP requests and the fact HTTP is a client-sends-first
protocol where as NNTP is server-sends-first.
---
lib/PublicInbox/DS.pm | 16 ++++++++--
lib/PublicInbox/HTTP.pm | 79 ++++++++++++++++++++++++-------------------------
lib/PublicInbox/NNTP.pm | 8 ++---
3 files changed, 55 insertions(+), 48 deletions(-)
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index a8700bc5..28240843 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -28,6 +28,7 @@ use 5.010_001;
use PublicInbox::Syscall qw(:epoll);
use fields ('sock', # underlying socket
+ 'rbuf', # scalarref, usually undef
'wbuf', # arrayref of coderefs or GLOB refs
'wbuf_off', # offset into first element of wbuf to start writing at
);
@@ -412,16 +413,27 @@ next_buf:
1; # all done
}
-sub do_read ($$$$) {
+sub rbuf_idle ($$) {
+ my ($self, $rbuf) = @_;
+ if ($$rbuf eq '') { # who knows how long till we can read again
+ delete $self->{rbuf};
+ } else {
+ $self->{rbuf} = $rbuf;
+ }
+}
+
+sub do_read ($$$;$) {
my ($self, $rbuf, $len, $off) = @_;
- my $r = sysread($self->{sock}, $$rbuf, $len, $off);
+ my $r = sysread($self->{sock}, $$rbuf, $len, $off // 0);
return ($r == 0 ? $self->close : $r) if defined $r;
# common for clients to break connections without warning,
# would be too noisy to log here:
if (ref($self) eq 'IO::Socket::SSL') {
my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+ rbuf_idle($self, $rbuf);
watch($self, $ev | EPOLLONESHOT);
} elsif ($! == EAGAIN) {
+ rbuf_idle($self, $rbuf);
watch($self, EPOLLIN | EPOLLONESHOT);
} else {
$self->close;
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index a1cb4aca..1153ef98 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -11,7 +11,7 @@ package PublicInbox::HTTP;
use strict;
use warnings;
use base qw(PublicInbox::DS);
-use fields qw(httpd env rbuf input_left remote_addr remote_port forward pull);
+use fields qw(httpd env input_left remote_addr remote_port forward pull);
use bytes (); # only for bytes::length
use Fcntl qw(:seek);
use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
@@ -60,7 +60,6 @@ sub new ($$$) {
my $self = fields::new($class);
$self->SUPER::new($sock, EPOLLIN | EPOLLONESHOT);
$self->{httpd} = $httpd;
- $self->{rbuf} = '';
($self->{remote_addr}, $self->{remote_port}) =
PublicInbox::Daemon::host_with_port($addr);
$self;
@@ -75,31 +74,34 @@ sub event_step { # called by PublicInbox::DS
# otherwise we can be buffering infinitely w/o backpressure
return read_input($self) if defined $self->{env};
- my $rbuf = \($self->{rbuf});
- my $off = bytes::length($$rbuf);
- $self->do_read($rbuf, 8192, $off) and rbuf_process($self);
+ my $rbuf = $self->{rbuf} // (\(my $x = ''));
+ $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return;
+ rbuf_process($self, $rbuf);
}
sub rbuf_process {
- my ($self) = @_;
+ my ($self, $rbuf) = @_;
+ $rbuf //= $self->{rbuf} // (\(my $x = ''));
my %env = %{$self->{httpd}->{env}}; # full hash copy
- my $r = parse_http_request($self->{rbuf}, \%env);
+ my $r = parse_http_request($$rbuf, \%env);
# We do not support Trailers in chunked requests, for now
# (they are rarely-used and git (as of 2.7.2) does not use them)
if ($r == -1 || $env{HTTP_TRAILER} ||
# this length-check is necessary for PURE_PERL=1:
- ($r == -2 && bytes::length($self->{rbuf}) > 0x4000)) {
+ ($r == -2 && bytes::length($$rbuf) > 0x4000)) {
return quit($self, 400);
}
- return $self->watch_in1 if $r < 0; # incomplete
- $self->{rbuf} = substr($self->{rbuf}, $r);
-
+ if ($r < 0) { # incomplete
+ $self->rbuf_idle($rbuf);
+ return $self->watch_in1;
+ }
+ $$rbuf = substr($$rbuf, $r);
my $len = input_prepare($self, \%env);
defined $len or return write_err($self, undef); # EMFILE/ENFILE
- $len ? read_input($self) : app_dispatch($self);
+ $len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf);
}
# IO::Handle::write returns boolean, this returns bytes written:
@@ -111,16 +113,15 @@ sub xwrite ($$$) {
$w;
}
-sub read_input ($) {
- my ($self) = @_;
+sub read_input ($;$) {
+ my ($self, $rbuf) = @_;
+ $rbuf //= $self->{rbuf} // (\(my $x = ''));
my $env = $self->{env};
return if $env->{REMOTE_ADDR}; # in app dispatch
- return read_input_chunked($self) if env_chunked($env);
+ return read_input_chunked($self, $rbuf) if env_chunked($env);
# env->{CONTENT_LENGTH} (identity)
- my $sock = $self->{sock};
my $len = delete $self->{input_left};
- my $rbuf = \($self->{rbuf});
my $input = $env->{'psgi.input'};
while ($len > 0) {
@@ -135,15 +136,15 @@ sub read_input ($) {
}
$$rbuf = '';
}
- my $r = sysread($sock, $$rbuf, 8192);
- return recv_err($self, $r, $len) unless $r;
+ $self->do_read($rbuf, 8192) or return recv_err($self, $len);
# continue looping if $r > 0;
}
- app_dispatch($self, $input);
+ app_dispatch($self, $input, $rbuf);
}
sub app_dispatch {
- my ($self, $input) = @_;
+ my ($self, $input, $rbuf) = @_;
+ $self->rbuf_idle($rbuf);
my $env = $self->{env};
$env->{REMOTE_ADDR} = $self->{remote_addr};
$env->{REMOTE_PORT} = $self->{remote_port};
@@ -235,11 +236,12 @@ sub identity_wcb ($) {
sub next_request ($) {
my ($self) = @_;
- if ($self->{rbuf} eq '') { # wait for next request
- $self->watch_in1;
- } else { # avoid recursion for pipelined requests
+ if ($self->{rbuf}) {
+ # avoid recursion for pipelined requests
push @$pipelineq, $self;
$pipet ||= PublicInbox::EvCleanup::asap(*process_pipelineq);
+ } else { # wait for next request
+ $self->watch_in1;
}
}
@@ -360,27 +362,25 @@ sub write_err {
}
sub recv_err {
- my ($self, $r, $len) = @_;
- return $self->close if (defined $r && $r == 0);
- if ($! == EAGAIN) {
+ my ($self, $len) = @_;
+ if ($! == EAGAIN) { # epoll/kevent watch already set by do_read
$self->{input_left} = $len;
- return $self->watch_in1;
+ } else {
+ err($self, "error reading input: $! ($len bytes remaining)");
}
- err($self, "error reading for input: $! ($len bytes remaining)");
- quit($self, 500);
}
sub read_input_chunked { # unlikely...
- my ($self) = @_;
+ my ($self, $rbuf) = @_;
+ $rbuf //= $self->{rbuf} // (\(my $x = ''));
my $input = $self->{env}->{'psgi.input'};
- my $sock = $self->{sock};
my $len = delete $self->{input_left};
- my $rbuf = \($self->{rbuf});
while (1) { # chunk start
if ($len == CHUNK_ZEND) {
$$rbuf =~ s/\A\r\n//s and
- return app_dispatch($self, $input);
+ return app_dispatch($self, $input, $rbuf);
+
return quit($self, 400) if bytes::length($$rbuf) > 2;
}
if ($len == CHUNK_END) {
@@ -403,9 +403,8 @@ sub read_input_chunked { # unlikely...
}
if ($len < 0) { # chunk header is trickled, read more
- my $off = bytes::length($$rbuf);
- my $r = sysread($sock, $$rbuf, 8192, $off);
- return recv_err($self, $r, $len) unless $r;
+ $self->do_read($rbuf, 8192, bytes::length($$rbuf)) or
+ return recv_err($self, $len);
# (implicit) goto chunk_start if $r > 0;
}
$len = CHUNK_ZEND if $len == 0;
@@ -429,8 +428,8 @@ sub read_input_chunked { # unlikely...
}
if ($$rbuf eq '') {
# read more of current chunk
- my $r = sysread($sock, $$rbuf, 8192);
- return recv_err($self, $r, $len) unless $r;
+ $self->do_read($rbuf, 8192) or
+ return recv_err($self, $len);
}
}
}
@@ -459,7 +458,7 @@ sub close {
# for graceful shutdown in PublicInbox::Daemon:
sub busy () {
my ($self) = @_;
- ($self->{rbuf} ne '' || $self->{env} || $self->{wbuf});
+ ($self->{rbuf} || $self->{env} || $self->{wbuf});
}
# fires after pending writes are complete:
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 53e18281..0a053627 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -6,7 +6,7 @@ package PublicInbox::NNTP;
use strict;
use warnings;
use base qw(PublicInbox::DS);
-use fields qw(nntpd article rbuf ng);
+use fields qw(nntpd article ng);
use PublicInbox::Search;
use PublicInbox::Msgmap;
use PublicInbox::MID qw(mid_escape);
@@ -985,11 +985,7 @@ sub event_step {
return $self->close if $r < 0;
my $len = bytes::length($$rbuf);
return $self->close if ($len >= LINE_MAX);
- if ($len) {
- $self->{rbuf} = $rbuf;
- } else {
- delete $self->{rbuf};
- }
+ $self->rbuf_idle($rbuf);
update_idle_time($self);
# maybe there's more pipelined data, or we'll have
--
EW
next prev parent reply other threads:[~2019-06-29 19:59 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-29 19:59 [PATCH 00/11] ds: more updates Eric Wong
2019-06-29 19:59 ` Eric Wong [this message]
2019-06-29 19:59 ` [PATCH 02/11] ds: move requeue logic over from NNTP Eric Wong
2019-06-29 19:59 ` [PATCH 03/11] http: use requeue instead of watch_in1 Eric Wong
2019-06-29 19:59 ` [PATCH 04/11] listener: use edge-triggered notifications Eric Wong
2019-06-29 19:59 ` [PATCH 05/11] ds: handle deferred DS->close after timers Eric Wong
2019-06-29 19:59 ` [PATCH 06/11] ds: consolidate IO::Socket::SSL checks Eric Wong
2019-06-29 19:59 ` [PATCH 07/11] http: support HTTPS (kinda) Eric Wong
2019-06-29 19:59 ` [PATCH 08/11] parentpipe: document and use one-shot wakeups Eric Wong
2019-06-29 19:59 ` [PATCH 09/11] parentpipe: make the ->close call more obvious Eric Wong
2019-06-29 19:59 ` [PATCH 10/11] httpd/async: switch to buffering-as-fast-as-possible Eric Wong
2019-06-29 19:59 ` [PATCH 11/11] http: use bigger, but shorter-lived buffers for pipes Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190629195951.32160-2-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).