From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 05B6E1FD50 for ; Fri, 24 Jan 2020 09:43:53 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 4/4] wwwstatic: wire up buffer bypass for -httpd Date: Fri, 24 Jan 2020 09:43:52 +0000 Message-Id: <20200124094352.19437-5-e@yhbt.net> In-Reply-To: <20200124094352.19437-1-e@yhbt.net> References: <20200124094352.19437-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This prevents public-inbox-httpd from buffering ->getline results from a static file into another temporary file when writing to slow clients. Instead we inject the static file ref with offsets and length directly into the {wbuf} queue. It took me a while to decide to go this route, some rejected ideas: 1. Using Plack::Util::set_io_path and having PublicInbox::HTTP serve the result directly. This is compatible with what some other PSGI servers do using sendfile. However, neither Starman or Twiggy currently use sendfile for partial responses. 2. Parsing the Content-Range response header for offsets and lengths to use with set_io_path for partial responses. These rejected ideas required increasing the complexity of HTTP response writing in PublicInbox::HTTP in the common, non-static file cases. Instead, we made minor changes to the colder write buffering path of PublicInbox::DS and leave the hot paths untouched. We still support generic PSGI servers via ->getline. However, since we don't know the characteristics of other PSGI servers, we no longer do a 64K initial read in an attempt to negotiate a larger TCP window. --- lib/PublicInbox/WwwStatic.pm | 54 +++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm index e1f536f3..174f4752 100644 --- a/lib/PublicInbox/WwwStatic.pm +++ b/lib/PublicInbox/WwwStatic.pm @@ -51,7 +51,24 @@ sub r ($;$) { [ $msg ] ] } -sub prepare_range { +sub getline_response ($$$$$) { + my ($env, $in, $off, $len, $path) = @_; + my $r = bless {}, __PACKAGE__; + if ($env->{'pi-httpd.async'}) { # public-inbox-httpd-only mode + $env->{'psgix.no-compress'} = 1; # do not chunk response + %$r = ( bypass => [$in, $off, $len, $env->{'psgix.io'}] ); + } else { + %$r = ( in => $in, off => $off, len => $len, path => $path ); + } + $r; +} + +sub getline_generic ($$$$) { + my ($in, $off, $len, $p) = @_; + bless { in => $in, off => $off, len => $len, path => $p }, __PACKAGE__; +} + +sub setup_range { my ($env, $in, $h, $beg, $end, $size) = @_; my $code = 200; my $len = $size; @@ -81,9 +98,6 @@ sub prepare_range { if ($len <= 0) { $code = 416; } else { - if ($in) { - sysseek($in, $beg, SEEK_SET) or return r(500); - } push @$h, qw(Accept-Ranges bytes Content-Range); push @$h, "bytes $beg-$end/$size"; @@ -95,7 +109,7 @@ sub prepare_range { push @$h, 'Content-Range', "bytes */$size"; return [ 416, $h, [] ]; } - ($code, $len); + ($code, $beg, $len); } # returns a PSGI arrayref response iff .gz and non-.gz mtimes match @@ -131,6 +145,7 @@ sub response ($$$;$) { return r(500); } return r(404) unless -f $in; + binmode $in, ':unix'; # reduce syscalls for read() >8192 bytes } my $size = -s _; # bare "_" reuses "struct stat" from "-f" above my $mtime = time2str((stat(_))[9]); @@ -142,30 +157,37 @@ sub response ($$$;$) { my $len = $size; my $code = 200; push @$h, 'Content-Type', $type; + my $off = 0; if (($env->{HTTP_RANGE} || '') =~ /\bbytes=([0-9]*)-([0-9]*)\z/) { - ($code, $len) = prepare_range($env, $in, $h, $1, $2, $size); + ($code, $off, $len) = setup_range($env, $in, $h, $1, $2, $size); return $code if ref($code); } push @$h, 'Content-Length', $len, 'Last-Modified', $mtime; - my $body = $in ? bless { - initial_rd => 65536, - len => $len, - in => $in, - path => $path, - env => $env, - }, __PACKAGE__ : []; - [ $code, $h, $body ]; + [ $code, $h, $in ? getline_response($env, $in, $off, $len, $path) : [] ] } # called by PSGI servers on each response chunk: sub getline { my ($self) = @_; + + # avoid buffering, by becoming the buffer! (public-inbox-httpd) + if (my $tmpio = delete $self->{bypass}) { + my $http = pop @$tmpio; # PublicInbox::HTTP + push @{$http->{wbuf}}, $tmpio; # [ $in, $off, $len ] + $http->flush_write; + return; # undef, EOF + } + + # generic PSGI runs this: my $len = $self->{len} or return; # undef, tells server we're done - my $n = delete($self->{initial_rd}) // 8192; + my $n = 8192; $n = $len if $len < $n; - my $r = sysread($self->{in}, my $buf, $n); + seek($self->{in}, $self->{off}, SEEK_SET) or + die "seek ($self->{path}): $!"; + my $r = read($self->{in}, my $buf, $n); if (defined $r && $r > 0) { # success! $self->{len} = $len - $r; + $self->{off} += $r; return $buf; } my $m = defined $r ? "EOF with $len bytes left" : "read error: $!";