From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] http: fix pipelining during long async requests
Date: Sat, 25 Nov 2023 20:54:33 +0000 [thread overview]
Message-ID: <20231125205435.4187895-2-e@80x24.org> (raw)
In-Reply-To: <20231125205435.4187895-1-e@80x24.org>
We must not attempt to read request bodies from the HTTP client
while processing a long request since that drains pipelined
requests. The NNTP/IMAP/POP3 event_step callbacks follow the
same behavior when {long_cb} is present from ->long_response.
This bug has little real-world consequence since HTTP/1.1
pipelining is not widely-used, especially when behind varnish
or other reverse proxies.
I found this bug while randomly strace-ing an active -netd
process to see the kind of traffic it was seeing.
---
lib/PublicInbox/HTTP.pm | 17 +++++------
xt/httpd-async-stream.t | 68 ++++++++++++++++++++++++++++++-----------
2 files changed, 59 insertions(+), 26 deletions(-)
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index 85991ae7..7162732e 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -76,7 +76,7 @@ sub new ($$$) {
sub event_step { # called by PublicInbox::DS
my ($self) = @_;
local $SIG{__WARN__} = $self->{srv_env}->{'pi-httpd.warn_cb'};
- return unless $self->flush_write && $self->{sock};
+ return unless $self->flush_write && $self->{sock} && !$self->{forward};
# only read more requests if we've drained the write buffer,
# otherwise we can be buffering infinitely w/o backpressure
@@ -230,6 +230,13 @@ sub identity_write ($$) {
sub response_done {
my ($self, $alive) = @_;
+ if (my $forward = delete $self->{forward}) { # avoid recursion
+ eval { $forward->close };
+ if ($@) {
+ warn "response forward->close error: $@";
+ return $self->close; # idempotent
+ }
+ }
delete $self->{env}; # we're no longer busy
# HEAD requests set $alive = 3 so we don't send "0\r\n\r\n";
$self->write(\"0\r\n\r\n") if $alive == 2;
@@ -268,14 +275,6 @@ sub getline_pull {
warn "response ->getline error: $@";
$self->close;
}
- # avoid recursion
- if (delete $self->{forward}) {
- eval { $forward->close };
- if ($@) {
- warn "response ->close error: $@";
- $self->close; # idempotent
- }
- }
response_done($self, delete $self->{alive});
}
diff --git a/xt/httpd-async-stream.t b/xt/httpd-async-stream.t
index 099ceb79..21d09331 100644
--- a/xt/httpd-async-stream.t
+++ b/xt/httpd-async-stream.t
@@ -2,8 +2,10 @@
# Copyright (C) all contributors <meta@public-inbox.org>
# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
# Expensive test to validate compression and TLS.
-use strict;
-use v5.10.1;
+use v5.12;
+use autodie;
+use PublicInbox::IO qw(write_file);
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use PublicInbox::TestCommon;
use PublicInbox::DS qw(now);
use PublicInbox::Spawn qw(popen_rd);
@@ -23,20 +25,15 @@ diag "TEST_JOBS=$JOBS TEST_ENDPOINT=$endpoint TEST_CURL_OPT=$curl_opt";
my @CURL_OPT = (qw(-HHost:example.com -sSf), split(' ', $curl_opt));
my $make_local_server = sub {
+ my ($http) = @_;
my $pi_config = "$tmpdir/config";
- open my $fh, '>', $pi_config or die "open($pi_config): $!";
- print $fh <<"" or die "print $pi_config: $!";
+ write_file '>', $pi_config, <<"";
[publicinbox "test"]
inboxdir = $inboxdir
address = test\@example.com
- close $fh or die "close($pi_config): $!";
my ($out, $err) = ("$tmpdir/out", "$tmpdir/err");
- for ($out, $err) {
- open my $fh, '>', $_ or die "truncate: $!";
- }
- my $http = tcp_server();
- my $rdr = { 3 => $http };
+ for ($out, $err) { open my $fh, '>', $_ }
# not using multiple workers, here, since we want to increase
# the chance of tripping concurrency bugs within PublicInbox/HTTP*.pm
@@ -46,10 +43,22 @@ address = test\@example.com
my $url = "$host_port/test/$endpoint";
print STDERR "# CMD ". join(' ', @$cmd). "\n";
my $env = { PI_CONFIG => $pi_config };
- (start_script($cmd, $env, $rdr), $url);
+ (start_script($cmd, $env, { 3 => $http }), $url)
};
-my ($td, $url) = $make_local_server->();
+my ($td, $url) = $make_local_server->(my $http = tcp_server());
+
+my $s1 = tcp_connect($http);
+my $rbuf = do { # pipeline while reading long response
+ my $req = <<EOM;
+GET /test/$endpoint HTTP/1.1\r
+Host: example.com\r
+\r
+EOM
+ is syswrite($s1, $req), length($req), 'initial long req';
+ <$s1>;
+};
+like $rbuf, qr!\AHTTP/1\.1 200\b!, 'started reading 200 response';
my $do_get_all = sub {
my ($job) = @_;
@@ -74,16 +83,16 @@ my $do_get_all = sub {
my (%pids, %res);
for my $job (1..$JOBS) {
- pipe(my ($r, $w)) or die;
+ pipe(my $r, my $w);
my $pid = fork;
if ($pid == 0) {
- close $r or die;
+ close $r;
my $res = $do_get_all->($job);
- print $w $res or die;
- close $w or die;
+ print $w $res;
+ close $w;
_exit(0);
}
- close $w or die;
+ close $w;
$pids{$pid} = [ $job, $r ];
}
@@ -96,6 +105,31 @@ while (scalar keys %pids) {
push @{$res{$sum}}, $job;
}
is(scalar keys %res, 1, 'all got the same result');
+{
+ my $req = <<EOM;
+GET /test/manifest.js.gz HTTP/1.1\r
+Host: example.com\r
+Connection: close\r
+\r
+EOM
+ is syswrite($s1, $req), length($req),
+ 'pipeline another request while reading long response';
+ diag 'reading remainder of slow response';
+ my $res = do { local $/ = "\r\n\r\n"; <$s1> };
+ like $res, qr/^Transfer-Encoding: chunked\r\n/sm, 'chunked response';
+ {
+ local $/ = "\r\n"; # get to final chunk
+ while (defined(my $l = <$s1>)) { last if $l eq "0\r\n" }
+ };
+ is scalar(readline($s1)), "\r\n", 'got final CRLF from 1st response';
+ diag "second response:";
+ $res = do { local $/ = "\r\n\r\n"; <$s1> };
+ like $res, qr!\AHTTP/1\.1 200 !, 'response for pipelined req';
+ gunzip($s1 => \my $json) or xbail "gunzip $GunzipError";
+ my $m = PublicInbox::Config::json()->decode($json);
+ like $m->{'/test'}->{fingerprint}, qr/\A[0-9a-f]{40,}\z/,
+ 'acceptable fingerprint in response';
+}
$td->kill;
$td->join;
is($?, 0, 'no error on -httpd exit');
next prev parent reply other threads:[~2023-11-25 20:54 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-25 20:54 [PATCH 0/3] ds: event loop-related fixes Eric Wong
2023-11-25 20:54 ` Eric Wong [this message]
2023-11-25 20:54 ` [PATCH 2/3] select+poll: have caller retry on EINTR Eric Wong
2023-11-25 20:54 ` [PATCH 3/3] ds: long_step: eliminate redundant fileno call Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231125205435.4187895-2-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).