From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 3/4] ds: tmpio: store offsets per-buffer
Date: Fri, 24 Jan 2020 09:43:51 +0000 [thread overview]
Message-ID: <20200124094352.19437-4-e@yhbt.net> (raw)
In-Reply-To: <20200124094352.19437-1-e@yhbt.net>
We want to be able to inject existing file handles + offsets and
even lengths into this in the future, without going through the
->getline interface[1]
We also switch to using a 64K buffer size since we can safely
discard whatever got truncated on write and full writes can help
negotiate a larger TCP window for high-latency, high-bandwidth
links.
While we're at it, make it obvious that we're using O_APPEND for
our tmpfile() interface so we can seek freely for reading while
the writer always prints to the end of the file.
[1] the getline interface for serving static files may result
in us buffering on-FS data into another temporary file,
which is a waste.
---
lib/PublicInbox/DS.pm | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 970061fd..a9ac7fcd 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -18,7 +18,7 @@ use strict;
use bytes;
use POSIX qw(WNOHANG);
use IO::Handle qw();
-use Fcntl qw(SEEK_SET :DEFAULT);
+use Fcntl qw(SEEK_SET :DEFAULT O_APPEND);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
use parent qw(Exporter);
our @EXPORT_OK = qw(now msg_more);
@@ -31,8 +31,8 @@ use PublicInbox::Tmpfile;
use fields ('sock', # underlying socket
'rbuf', # scalarref, usually undef
- 'wbuf', # arrayref of coderefs or GLOB refs (autovivified)
- 'wbuf_off', # offset into first element of wbuf to start writing at
+ 'wbuf', # arrayref of coderefs or tmpio (autovivified))
+ # (tmpio = [ GLOB, offset, [ length ] ])
);
use Errno qw(EAGAIN EINVAL);
@@ -392,11 +392,13 @@ sub close {
}
# portable, non-thread-safe sendfile emulation (no pread, yet)
-sub psendfile ($$$) {
- my ($sock, $fh, $off) = @_;
+sub send_tmpio ($$) {
+ my ($sock, $tmpio) = @_;
- seek($fh, $$off, SEEK_SET) or return;
- defined(my $to_write = read($fh, my $buf, 16384)) or return;
+ seek($tmpio->[0], $tmpio->[1], SEEK_SET) or return;
+ my $n = $tmpio->[2] // 65536;
+ $n = 65536 if $n > 65536;
+ defined(my $to_write = read($tmpio->[0], my $buf, $n)) or return;
my $written = 0;
while ($to_write > 0) {
if (defined(my $w = syswrite($sock, $buf, $to_write, $written))) {
@@ -407,7 +409,8 @@ sub psendfile ($$$) {
last;
}
}
- $$off += $written;
+ $tmpio->[1] += $written; # offset
+ $tmpio->[2] -= $written if defined($tmpio->[2]); # length
$written;
}
@@ -424,9 +427,8 @@ sub flush_write ($) {
next_buf:
while (my $bref = $wbuf->[0]) {
if (ref($bref) ne 'CODE') {
- my $off = delete($self->{wbuf_off}) // 0;
while ($sock) {
- my $w = psendfile($sock, $bref, \$off);
+ my $w = send_tmpio($sock, $bref); # bref is tmpio
if (defined $w) {
if ($w == 0) {
shift @$wbuf;
@@ -434,13 +436,12 @@ next_buf:
}
} elsif ($! == EAGAIN) {
epwait($sock, epbit($sock, EPOLLOUT) | EPOLLONESHOT);
- $self->{wbuf_off} = $off;
return 0;
} else {
return $self->close;
}
}
- } else { #($ref eq 'CODE') {
+ } else { #(ref($bref) eq 'CODE') {
shift @$wbuf;
my $before = scalar(@$wbuf);
$bref->($self);
@@ -490,12 +491,13 @@ sub drop {
# PerlIO::mmap or PerlIO::scalar if needed
sub tmpio ($$$) {
my ($self, $bref, $off) = @_;
- my $fh = tmpfile('wbuf', $self->{sock}, 1) or
+ my $fh = tmpfile('wbuf', $self->{sock}, O_APPEND) or
return drop($self, "tmpfile $!");
$fh->autoflush(1);
+ binmode $fh, ':unix'; # reduce syscalls for read() >8192 bytes
my $len = bytes::length($$bref) - $off;
$fh->write($$bref, $len, $off) or return drop($self, "write ($len): $!");
- $fh
+ [ $fh, 0 ] # [1] = offset, [2] = length, not set by us
}
=head2 C<< $obj->write( $data ) >>
@@ -524,9 +526,9 @@ sub write {
if ($ref eq 'CODE') {
push @$wbuf, $bref;
} else {
- my $last = $wbuf->[-1];
- if (ref($last) eq 'GLOB') { # append to tmp file buffer
- $last->print($$bref) or return drop($self, "print: $!");
+ my $tmpio = $wbuf->[-1];
+ if ($tmpio && !defined($tmpio->[2])) { # append to tmp file buffer
+ $tmpio->[0]->print($$bref) or return drop($self, "print: $!");
} else {
my $tmpio = tmpio($self, $bref, 0) or return 0;
push @$wbuf, $tmpio;
next prev parent reply other threads:[~2020-01-24 9:43 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-24 9:43 [PATCH 0/4] -httpd static file improvements Eric Wong
2020-01-24 9:43 ` [PATCH 1/4] http: eliminate short-lived cyclic ref for psgix.io Eric Wong
2020-01-24 9:43 ` [PATCH 2/4] wwwstatic: offload error handling to PSGI server Eric Wong
2020-01-24 9:43 ` Eric Wong [this message]
2020-01-24 19:07 ` [PATCH 3/4] ds: tmpio: store offsets per-buffer Eric Wong
2020-01-24 9:43 ` [PATCH 4/4] wwwstatic: wire up buffer bypass for -httpd Eric Wong
2020-01-25 19:27 ` Eric Wong
2020-01-25 19:34 ` Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200124094352.19437-4-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).