* Re: [PATCH 3/4] ds: tmpio: store offsets per-buffer
2020-01-24 9:43 5% ` [PATCH 3/4] ds: tmpio: store offsets per-buffer Eric Wong
@ 2020-01-24 19:07 7% ` Eric Wong
0 siblings, 0 replies; 3+ results
From: Eric Wong @ 2020-01-24 19:07 UTC (permalink / raw)
To: meta
Eric Wong <e@yhbt.net> wrote:
> diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
> index 970061fd..a9ac7fcd 100644
> --- a/lib/PublicInbox/DS.pm
> +++ b/lib/PublicInbox/DS.pm
> @@ -490,12 +491,13 @@ sub drop {
> # PerlIO::mmap or PerlIO::scalar if needed
> sub tmpio ($$$) {
> my ($self, $bref, $off) = @_;
> - my $fh = tmpfile('wbuf', $self->{sock}, 1) or
> + my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
> return drop($self, "tmpfile $!");
> $fh->autoflush(1);
> + binmode $fh, ':unix'; # reduce syscalls for read() >8192 bytes
That binmode :unix call triggers a leak in Perl[1], going to
have to squash the patch below to workaround it:
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index a9ac7fcd..c76a5038 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -494,7 +494,6 @@ sub tmpio ($$$) {
my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
return drop($self, "tmpfile $!");
$fh->autoflush(1);
- binmode $fh, ':unix'; # reduce syscalls for read() >8192 bytes
my $len = bytes::length($$bref) - $off;
$fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!");
[ $fh, 0 ] # [1] = offset, [2] = length, not set by us
... And perhaps just use sysread() instead of read(), since
I'm not seeing a good reason to keep compatibility with
PerlIO::scalar for buffering, after all.
[1] http://nntp.perl.org/group/perl.perl5.porters/256918
^ permalink raw reply related [relevance 7%]
* [PATCH 0/4] -httpd static file improvements
@ 2020-01-24 9:43 6% Eric Wong
2020-01-24 9:43 5% ` [PATCH 3/4] ds: tmpio: store offsets per-buffer Eric Wong
0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2020-01-24 9:43 UTC (permalink / raw)
To: meta
Serving large static files to slow clients could lead to
public-inbox-httpd buffering data already in static files again
into a temporary file.
This was inefficient, and solving it in a generic way could
actually break other PSGI servers since their sendfile
optimizations don't handle 206 (partial content) responses
correctly.
So we make minor changes to the way PublicInbox::DS handles
write buffers and inject static files, offsets, and length
limits directly into the {wbuf} queue.
Eric Wong (4):
http: eliminate short-lived cyclic ref for psgix.io
wwwstatic: offload error handling to PSGI server
ds: tmpio: store offsets per-buffer
wwwstatic: wire up buffer bypass for -httpd
lib/PublicInbox/DS.pm | 36 ++++++++++----------
lib/PublicInbox/HTTP.pm | 7 ++--
lib/PublicInbox/WwwStatic.pm | 65 +++++++++++++++++++++---------------
3 files changed, 61 insertions(+), 47 deletions(-)
^ permalink raw reply [relevance 6%]
* [PATCH 3/4] ds: tmpio: store offsets per-buffer
2020-01-24 9:43 6% [PATCH 0/4] -httpd static file improvements Eric Wong
@ 2020-01-24 9:43 5% ` Eric Wong
2020-01-24 19:07 7% ` Eric Wong
0 siblings, 1 reply; 3+ results
From: Eric Wong @ 2020-01-24 9:43 UTC (permalink / raw)
To: meta
We want to be able to inject existing file handles + offsets and
even lengths into this in the future, without going through the
->getline interface[1]
We also switch to using a 64K buffer size since we can safely
discard whatever got truncated on write and full writes can help
negotiate a larger TCP window for high-latency, high-bandwidth
links.
While we're at it, make it obvious that we're using O_APPEND for
our tmpfile() interface so we can seek freely for reading while
the writer always prints to the end of the file.
[1] the getline interface for serving static files may result
in us buffering on-FS data into another temporary file,
which is a waste.
---
lib/PublicInbox/DS.pm | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 970061fd..a9ac7fcd 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -18,7 +18,7 @@ use strict;
use bytes;
use POSIX qw(WNOHANG);
use IO::Handle qw();
-use Fcntl qw(SEEK_SET :DEFAULT);
+use Fcntl qw(SEEK_SET :DEFAULT O_APPEND);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
use parent qw(Exporter);
our @EXPORT_OK = qw(now msg_more);
@@ -31,8 +31,8 @@ use PublicInbox::Tmpfile;
use fields ('sock', # underlying socket
'rbuf', # scalarref, usually undef
- 'wbuf', # arrayref of coderefs or GLOB refs (autovivified)
- 'wbuf_off', # offset into first element of wbuf to start writing at
+ 'wbuf', # arrayref of coderefs or tmpio (autovivified))
+ # (tmpio = [ GLOB, offset, [ length ] ])
);
use Errno qw(EAGAIN EINVAL);
@@ -392,11 +392,13 @@ sub close {
}
# portable, non-thread-safe sendfile emulation (no pread, yet)
-sub psendfile ($$$) {
- my ($sock, $fh, $off) = @_;
+sub send_tmpio ($$) {
+ my ($sock, $tmpio) = @_;
- seek($fh, $$off, SEEK_SET) or return;
- defined(my $to_write = read($fh, my $buf, 16384)) or return;
+ seek($tmpio->[0], $tmpio->[1], SEEK_SET) or return;
+ my $n = $tmpio->[2] // 65536;
+ $n = 65536 if $n > 65536;
+ defined(my $to_write = read($tmpio->[0], my $buf, $n)) or return;
my $written = 0;
while ($to_write > 0) {
if (defined(my $w = syswrite($sock, $buf, $to_write, $written))) {
@@ -407,7 +409,8 @@ sub psendfile ($$$) {
last;
}
}
- $$off += $written;
+ $tmpio->[1] += $written; # offset
+ $tmpio->[2] -= $written if defined($tmpio->[2]); # length
$written;
}
@@ -424,9 +427,8 @@ sub flush_write ($) {
next_buf:
while (my $bref = $wbuf->[0]) {
if (ref($bref) ne 'CODE') {
- my $off = delete($self->{wbuf_off}) // 0;
while ($sock) {
- my $w = psendfile($sock, $bref, \$off);
+ my $w = send_tmpio($sock, $bref); # bref is tmpio
if (defined $w) {
if ($w == 0) {
shift @$wbuf;
@@ -434,13 +436,12 @@ next_buf:
}
} elsif ($! == EAGAIN) {
epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
- $self->{wbuf_off} = $off;
return 0;
} else {
return $self->close;
}
}
- } else { #($ref eq 'CODE') {
+ } else { #(ref($bref) eq 'CODE') {
shift @$wbuf;
my $before = scalar(@$wbuf);
$bref->($self);
@@ -490,12 +491,13 @@ sub drop {
# PerlIO::mmap or PerlIO::scalar if needed
sub tmpio ($$$) {
my ($self, $bref, $off) = @_;
- my $fh = tmpfile('wbuf', $self->{sock}, 1) or
+ my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
return drop($self, "tmpfile $!");
$fh->autoflush(1);
+ binmode $fh, ':unix'; # reduce syscalls for read() >8192 bytes
my $len = bytes::length($$bref) - $off;
$fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!");
- $fh
+ [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
}
=head2 C<< $obj->write( $data ) >>
@@ -524,9 +526,9 @@ sub write {
if ($ref eq 'CODE') {
push @$wbuf, $bref;
} else {
- my $last = $wbuf->[-1];
- if (ref($last) eq 'GLOB') { # append to tmp file buffer
- $last->print($$bref) or return drop($self, "print: $!");
+ my $tmpio = $wbuf->[-1];
+ if ($tmpio && !defined($tmpio->[2])) { # append to tmp file buffer
+ $tmpio->[0]->print($$bref) or return drop($self, "print: $!");
} else {
my $tmpio = tmpio($self, $bref, 0) or return 0;
push @$wbuf, $tmpio;
^ permalink raw reply related [relevance 5%]
Results 1-3 of 3 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-01-24 9:43 6% [PATCH 0/4] -httpd static file improvements Eric Wong
2020-01-24 9:43 5% ` [PATCH 3/4] ds: tmpio: store offsets per-buffer Eric Wong
2020-01-24 19:07 7% ` Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).