From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 063851FB07 for ; Sat, 9 Oct 2021 12:03:37 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/4] http: avoid Perl target cache for psgi.input Date: Sat, 9 Oct 2021 12:03:35 +0000 Message-Id: <20211009120336.2057-4-e@80x24.org> In-Reply-To: <20211009120336.2057-1-e@80x24.org> References: <20211009120336.2057-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: By using syswrite to populate env->{psgi.input}. The substr() call IO::Handle->write will trigger Perl's target/scratchpad and result in a permanent allocation. Since this is a cold path, that allocation is pointless, and syswrite() can already write a substring. Allowing Perl to cache a large allocation in a cold path only result in fragmentation and wasted RAM. write(2) on a regular file won't result in short writes unless the FS quotas or free space limits are hit, or the buffer is close to overflowing (e.g. the 0x7ffff000-byte Linux limit). Since our HTTP server will never buffer that much in RAM, there's no need to retry syswrite nor rely on the retrying implicit in IO::Handle->write and the "print" perlop. --- lib/PublicInbox/HTTP.pm | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm index b2c74cf3..82c2b200 100644 --- a/lib/PublicInbox/HTTP.pm +++ b/lib/PublicInbox/HTTP.pm @@ -26,7 +26,6 @@ use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl use Plack::Util; use HTTP::Status qw(status_message); use HTTP::Date qw(time2str); -use IO::Handle; # ->write use PublicInbox::DS qw(msg_more); use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT); use PublicInbox::Tmpfile; @@ -117,15 +116,6 @@ sub rbuf_process { $len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf); } -# IO::Handle::write returns boolean, this returns bytes written: -sub xwrite ($$$) { - my ($fh, $rbuf, $max) = @_; - my $w = length($$rbuf); - $w = $max if $w > $max; - $fh->write($$rbuf, $w) or return; - $w; -} - sub read_input ($;$) { my ($self, $rbuf) = @_; $rbuf //= $self->{rbuf} // (\(my $x = '')); @@ -138,7 +128,7 @@ sub read_input ($;$) { while ($len > 0) { if ($$rbuf ne '') { - my $w = xwrite($input, $rbuf, $len); + my $w = syswrite($input, $$rbuf, $len); return write_err($self, $len) unless $w; $len -= $w; die "BUG: $len < 0 (w=$w)" if $len < 0; @@ -333,12 +323,6 @@ sub response_write { } } -sub input_tmpfile ($) { - my $input = tmpfile('http.input', $_[0]->{sock}) or return; - $input->autoflush(1); - $input; -} - sub input_prepare { my ($self, $env) = @_; my ($input, $len); @@ -354,24 +338,22 @@ sub input_prepare { return quit($self, 400) if $hte !~ /\Achunked\z/i; $len = CHUNK_START; - $input = input_tmpfile($self); + $input = tmpfile('http.input', $self->{sock}); } else { $len = $env->{CONTENT_LENGTH}; if (defined $len) { # rfc7230 3.3.3.4 return quit($self, 400) if $len !~ /\A[0-9]+\z/; - return quit($self, 413) if $len > $MAX_REQUEST_BUFFER; - $input = $len ? input_tmpfile($self) : $null_io; + $input = $len ? tmpfile('http.input', $self->{sock}) + : $null_io; } else { $input = $null_io; } } # TODO: expire idle clients on ENFILE / EMFILE - return unless $input; - - $env->{'psgi.input'} = $input; + $env->{'psgi.input'} = $input // return; $self->{env} = $env; $self->{input_left} = $len || 0; } @@ -441,7 +423,7 @@ sub read_input_chunked { # unlikely... # drain the current chunk until ($len <= 0) { if ($$rbuf ne '') { - my $w = xwrite($input, $rbuf, $len); + my $w = syswrite($input, $$rbuf, $len); return write_err($self, "$len chunk") if !$w; $len -= $w; if ($len == 0) {