user/dev discussion of public-inbox itself
 help / color / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 34/57] allow use of PerlIO layers for filesystem writes
Date: Mon, 24 Jun 2019 02:52:35 +0000
Message-ID: <20190624025258.25592-35-e@80x24.org> (raw)
In-Reply-To: <20190624025258.25592-1-e@80x24.org>

It may make sense to use PerlIO::mmap or PerlIO::scalar for
DS write buffering with IO::Socket::SSL or similar (since we can't
use MSG_MORE), so that means we need to go through buffering
in userspace for the common case; while still being easily
compatible with slow clients.

And it also simplifies GitHTTPBackend slightly.

Maybe it can make sense for HTTP input buffering, too...
---
 lib/PublicInbox/DS.pm             | 32 ++++++++++++-------------------
 lib/PublicInbox/GitHTTPBackend.pm | 18 ++++++++---------
 lib/PublicInbox/HTTP.pm           | 24 ++++++++++++++++++-----
 3 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index 8735e888..486af40e 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -21,7 +21,7 @@ use IO::Handle qw();
 use Fcntl qw(FD_CLOEXEC F_SETFD F_GETFD SEEK_SET);
 use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
 use parent qw(Exporter);
-our @EXPORT_OK = qw(now msg_more write_in_full);
+our @EXPORT_OK = qw(now msg_more);
 use warnings;
 use 5.010_001;
 
@@ -422,8 +422,8 @@ sub close {
 sub psendfile ($$$) {
     my ($sock, $fh, $off) = @_;
 
-    sysseek($fh, $$off, SEEK_SET) or return;
-    defined(my $to_write = sysread($fh, my $buf, 16384)) or return;
+    seek($fh, $$off, SEEK_SET) or return;
+    defined(my $to_write = read($fh, my $buf, 16384)) or return;
     my $written = 0;
     while ($to_write > 0) {
         if (defined(my $w = syswrite($sock, $buf, $to_write, $written))) {
@@ -482,29 +482,18 @@ sub do_read ($$$$) {
     $! == EAGAIN ? $self->watch_in1 : $self->close;
 }
 
-sub write_in_full ($$$$) {
-    my ($fh, $bref, $len, $off) = @_;
-    my $rv = 0;
-    while ($len > 0) {
-        my $w = syswrite($fh, $$bref, $len, $off);
-        return ($rv ? $rv : $w) unless $w; # undef or 0
-        $rv += $w;
-        $len -= $w;
-        $off += $w;
-    }
-    $rv
-}
-
+# n.b.: use ->write/->read for this buffer to allow compatibility with
+# PerlIO::mmap or PerlIO::scalar if needed
 sub tmpbuf ($$) {
     my ($bref, $off) = @_;
     # open(my $fh, '+>>', undef) doesn't set O_APPEND
     my ($fh, $path) = tempfile('wbuf-XXXXXXX', TMPDIR => 1);
     open $fh, '+>>', $path or die "open: $!";
+    $fh->autoflush(1);
     unlink $path;
     my $to_write = bytes::length($$bref) - $off;
-    my $w = write_in_full($fh, $bref, $to_write, $off);
-    die "write_in_full ($to_write): $!" unless defined $w;
-    $w == $to_write ? $fh : die("short write $w < $to_write");
+    $fh->write($$bref, $to_write, $off) or die "write ($to_write): $!";
+    $fh;
 }
 
 =head2 C<< $obj->write( $data ) >>
@@ -534,7 +523,10 @@ sub write {
         } else {
             my $last = $wbuf->[-1];
             if (ref($last) eq 'GLOB') { # append to tmp file buffer
-                write_in_full($last, $bref, bytes::length($$bref), 0);
+                unless ($last->print($$bref)) {
+                    warn "error buffering: $!";
+                    return $self->close;
+                }
             } else {
                 push @$wbuf, tmpbuf($bref, 0);
             }
diff --git a/lib/PublicInbox/GitHTTPBackend.pm b/lib/PublicInbox/GitHTTPBackend.pm
index a2a81f8e..303d5073 100644
--- a/lib/PublicInbox/GitHTTPBackend.pm
+++ b/lib/PublicInbox/GitHTTPBackend.pm
@@ -231,18 +231,16 @@ sub input_prepare {
 			return;
 		}
 		last if $r == 0;
-		my $off = 0;
-		while ($r > 0) {
-			my $w = syswrite($in, $buf, $r, $off);
-			if (defined $w) {
-				$r -= $w;
-				$off += $w;
-			} else {
-				err($env, "error writing temporary file: $!");
-				return;
-			}
+		unless (print $in $buf) {
+			err($env, "error writing temporary file: $!");
+			return;
 		}
 	}
+	# ensure it's visible to git-http-backend(1):
+	unless ($in->flush) {
+		err($env, "error writing temporary file: $!");
+		return;
+	}
 	unless (defined(sysseek($in, 0, SEEK_SET))) {
 		err($env, "error seeking temporary file: $!");
 		return;
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 7697ac5c..a1cb4aca 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -19,7 +19,7 @@ use HTTP::Status qw(status_message);
 use HTTP::Date qw(time2str);
 use IO::Handle;
 require PublicInbox::EvCleanup;
-PublicInbox::DS->import(qw(msg_more write_in_full));
+PublicInbox::DS->import(qw(msg_more));
 use PublicInbox::Syscall qw(EPOLLIN EPOLLONESHOT);
 use constant {
 	CHUNK_START => -1,   # [a-f0-9]+\r\n
@@ -102,6 +102,15 @@ sub rbuf_process {
 	$len ? read_input($self) : app_dispatch($self);
 }
 
+# IO::Handle::write returns boolean, this returns bytes written:
+sub xwrite ($$$) {
+	my ($fh, $rbuf, $max) = @_;
+	my $w = bytes::length($$rbuf);
+	$w = $max if $w > $max;
+	$fh->write($$rbuf, $w) or return;
+	$w;
+}
+
 sub read_input ($) {
 	my ($self) = @_;
 	my $env = $self->{env};
@@ -116,7 +125,7 @@ sub read_input ($) {
 
 	while ($len > 0) {
 		if ($$rbuf ne '') {
-			my $w = write_in_full($input, $rbuf, $len, 0);
+			my $w = xwrite($input, $rbuf, $len);
 			return write_err($self, $len) unless $w;
 			$len -= $w;
 			die "BUG: $len < 0 (w=$w)" if $len < 0;
@@ -306,6 +315,11 @@ sub response_write {
 	}
 }
 
+sub input_tmpfile ($) {
+	open($_[0], '+>', undef);
+	$_[0]->autoflush(1);
+}
+
 sub input_prepare {
 	my ($self, $env) = @_;
 	my $input;
@@ -315,10 +329,10 @@ sub input_prepare {
 			quit($self, 413);
 			return;
 		}
-		open($input, '+>', undef);
+		input_tmpfile($input);
 	} elsif (env_chunked($env)) {
 		$len = CHUNK_START;
-		open($input, '+>', undef);
+		input_tmpfile($input);
 	} else {
 		$input = $null_io;
 	}
@@ -399,7 +413,7 @@ sub read_input_chunked { # unlikely...
 		# drain the current chunk
 		until ($len <= 0) {
 			if ($$rbuf ne '') {
-				my $w = write_in_full($input, $rbuf, $len, 0);
+				my $w = xwrite($input, $rbuf, $len);
 				return write_err($self, "$len chunk") if !$w;
 				$len -= $w;
 				if ($len == 0) {
-- 
EW


  parent reply index

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-24  2:52 [PATCH 00/57] ds: shrink, TLS support, buffer writes to FS Eric Wong
2019-06-24  2:52 ` [PATCH 01/57] ds: get rid of {closed} field Eric Wong
2019-06-24  2:52 ` [PATCH 02/57] ds: get rid of more unused debug instance methods Eric Wong
2019-06-24  2:52 ` [PATCH 03/57] ds: use and export monotonic now() Eric Wong
2019-06-24  2:52 ` [PATCH 04/57] AddTimer: avoid clock_gettime for the '0' case Eric Wong
2019-06-24  2:52 ` [PATCH 05/57] ds: get rid of on_incomplete_write wrapper Eric Wong
2019-06-24  2:52 ` [PATCH 06/57] ds: lazy initialize wbuf_off Eric Wong
2019-06-24  2:52 ` [PATCH 07/57] ds: split out from ->flush_write and ->write Eric Wong
2019-06-24  2:52 ` [PATCH 08/57] ds: lazy-initialize wbuf Eric Wong
2019-06-24  2:52 ` [PATCH 09/57] ds: don't pass `events' arg to EPOLL_CTL_DEL Eric Wong
2019-06-24  2:52 ` [PATCH 10/57] ds: remove support for DS->write(undef) Eric Wong
2019-06-24  2:52 ` [PATCH 11/57] http: favor DS->write(strref) when reasonable Eric Wong
2019-06-24  2:52 ` [PATCH 12/57] ds: share send(..., MSG_MORE) logic Eric Wong
2019-06-24  2:52 ` [PATCH 13/57] ds: switch write buffering to use a tempfile Eric Wong
2019-06-24  2:52 ` [PATCH 14/57] ds: get rid of redundant and unnecessary POLL* constants Eric Wong
2019-06-24  2:52 ` [PATCH 15/57] syscall: get rid of unused EPOLL* constants Eric Wong
2019-06-24  2:52 ` [PATCH 16/57] syscall: get rid of unnecessary uname local vars Eric Wong
2019-06-24  2:52 ` [PATCH 17/57] ds: set event flags directly at initialization Eric Wong
2019-06-24  2:52 ` [PATCH 18/57] ds: import IO::KQueue namespace Eric Wong
2019-06-24  2:52 ` [PATCH 19/57] ds: share watch_chg between watch_read/watch_write Eric Wong
2019-06-24  2:52 ` [PATCH 20/57] ds: remove IO::Poll support (for now) Eric Wong
2019-06-24  2:52 ` [PATCH 21/57] ds: get rid of event_watch field Eric Wong
2019-06-24  2:52 ` [PATCH 22/57] httpd/async: remove EINTR check Eric Wong
2019-06-24  2:52 ` [PATCH 23/57] spawn: remove `Blocking' flag handling Eric Wong
2019-06-24  2:52 ` [PATCH 24/57] qspawn: describe where `$rpipe' come from Eric Wong
2019-06-24  2:52 ` [PATCH 25/57] http|nntp: favor "$! == EFOO" over $!{EFOO} checks Eric Wong
2019-06-24  2:52 ` [PATCH 26/57] ds: favor `delete' over assigning fields to `undef' Eric Wong
2019-06-24  2:52 ` [PATCH 27/57] http: don't pass extra args to PublicInbox::DS::close Eric Wong
2019-06-24  2:52 ` [PATCH 28/57] ds: pass $self to code references Eric Wong
2019-06-24  2:52 ` [PATCH 29/57] evcleanup: replace _run_asap with `event_step' callback Eric Wong
2019-06-24  2:52 ` [PATCH 30/57] ds: remove pointless exit calls Eric Wong
2019-06-24  2:52 ` [PATCH 31/57] http|nntp: be explicit about bytes::length on rbuf Eric Wong
2019-06-24  2:52 ` [PATCH 32/57] ds: hoist out do_read from NNTP and HTTP Eric Wong
2019-06-24  2:52 ` [PATCH 33/57] nntp: simplify re-arming/requeue logic Eric Wong
2019-06-24  2:52 ` Eric Wong [this message]
2019-06-24  2:52 ` [PATCH 35/57] ds: deal better with FS-related errors IO buffers Eric Wong
2019-06-24  2:52 ` [PATCH 36/57] nntp: wait for writability before sending greeting Eric Wong
2019-06-24  2:52 ` [PATCH 37/57] nntp: NNTPS and NNTP+STARTTLS working Eric Wong
2019-06-24  2:52 ` [PATCH 38/57] certs/create-certs.perl: fix cert validity on 32-bit Eric Wong
2019-06-24  2:52 ` [PATCH 39/57] daemon: map inherited sockets to well-known schemes Eric Wong
2019-06-24  2:52 ` [PATCH 40/57] ds|nntp: use CORE::close on socket Eric Wong
2019-06-24  2:52 ` [PATCH 41/57] nntp: call SSL_shutdown in normal cases Eric Wong
2019-06-24  2:52 ` [PATCH 42/57] t/nntpd-tls: slow client connection test Eric Wong
2019-06-24  2:52 ` [PATCH 43/57] daemon: use SSL_MODE_RELEASE_BUFFERS Eric Wong
2019-06-24  2:52 ` [PATCH 44/57] ds: allow ->write callbacks to syswrite directly Eric Wong
2019-06-24  2:52 ` [PATCH 45/57] nntp: reduce allocations for greeting Eric Wong
2019-06-24  2:52 ` [PATCH 46/57] ds: always use EV_ADD with EV_SET Eric Wong
2019-06-24  2:52 ` [PATCH 47/57] nntp: simplify long response logic and fix nesting Eric Wong
2019-06-24  2:52 ` [PATCH 48/57] ds: flush_write runs ->write callbacks even if closed Eric Wong
2019-06-24  2:52 ` [PATCH 49/57] nntp: lazily allocate and stash rbuf Eric Wong
2019-06-24  2:52 ` [PATCH 50/57] ci: require IO::KQueue on FreeBSD, for now Eric Wong
2019-06-24  2:52 ` [PATCH 51/57] nntp: send greeting immediately for plain sockets Eric Wong
2019-06-24  2:52 ` [PATCH 52/57] daemon: set TCP_DEFER_ACCEPT on everything but NNTP Eric Wong
2019-06-24  2:52 ` [PATCH 53/57] daemon: use FreeBSD accept filters on non-NNTP Eric Wong
2019-06-24  2:52 ` [PATCH 54/57] ds: split out IO::KQueue-specific code Eric Wong
2019-06-24  5:24   ` Eric Wong
2019-06-24  2:52 ` [PATCH 55/57] ds: reimplement IO::Poll support to look like epoll Eric Wong
2019-06-24  2:52 ` [PATCH 56/57] Revert "ci: require IO::KQueue on FreeBSD, for now" Eric Wong
2019-06-24  2:52 ` [PATCH 57/57] ds: reduce overhead of tempfile creation Eric Wong
2019-06-24  5:25 ` [PATCH 58/57] Makefile: skip DSKQXS in global syntax check Eric Wong
2019-06-24 18:28 ` [PATCH 59/57] ds: ->write must not clobber empty wbuf array Eric Wong

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190624025258.25592-35-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox