user/dev discussion of public-inbox itself
 help / color / Atom feed
* [PATCH] ds: share lazy rbuf handling between HTTP and NNTP
@ 2019-06-26  8:32 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2019-06-26  8:32 UTC (permalink / raw)
  To: meta

Doing this for HTTP cuts the memory usage of 10K
idle-after-one-request HTTP clients from 92 MB to 47 MB.

The savings over the equivalent NNTP change in commit
6f173864f5acac89769a67739b8c377510711d49,
("nntp: lazily allocate and stash rbuf") seems down to the
size of HTTP requests and the fact HTTP is a client-sends-first
protocol where as NNTP is server-sends-first.
---
 lib/PublicInbox/DS.pm   | 16 +++++++--
 lib/PublicInbox/HTTP.pm | 79 ++++++++++++++++++++---------------------
 lib/PublicInbox/NNTP.pm |  8 ++---
 3 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm
index a8700bc5..28240843 100644
--- a/lib/PublicInbox/DS.pm
+++ b/lib/PublicInbox/DS.pm
@@ -28,6 +28,7 @@ use 5.010_001;
 use PublicInbox::Syscall qw(:epoll);
 
 use fields ('sock',              # underlying socket
+            'rbuf',              # scalarref, usually undef
             'wbuf',              # arrayref of coderefs or GLOB refs
             'wbuf_off',  # offset into first element of wbuf to start writing at
             );
@@ -412,16 +413,27 @@ next_buf:
     1; # all done
 }
 
-sub do_read ($$$$) {
+sub rbuf_idle ($$) {
+    my ($self, $rbuf) = @_;
+    if ($$rbuf eq '') { # who knows how long till we can read again
+        delete $self->{rbuf};
+    } else {
+        $self->{rbuf} = $rbuf;
+    }
+}
+
+sub do_read ($$$;$) {
     my ($self, $rbuf, $len, $off) = @_;
-    my $r = sysread($self->{sock}, $$rbuf, $len, $off);
+    my $r = sysread($self->{sock}, $$rbuf, $len, $off // 0);
     return ($r == 0 ? $self->close : $r) if defined $r;
     # common for clients to break connections without warning,
     # would be too noisy to log here:
     if (ref($self) eq 'IO::Socket::SSL') {
         my $ev = PublicInbox::TLS::epollbit() or return $self->close;
+        rbuf_idle($self, $rbuf);
         watch($self, $ev | EPOLLONESHOT);
     } elsif ($! == EAGAIN) {
+        rbuf_idle($self, $rbuf);
         watch($self, EPOLLIN | EPOLLONESHOT);
     } else {
         $self->close;
diff --git a/lib/PublicInbox/HTTP.pm b/lib/PublicInbox/HTTP.pm
index a1cb4aca..1153ef98 100644
--- a/lib/PublicInbox/HTTP.pm
+++ b/lib/PublicInbox/HTTP.pm
@@ -11,7 +11,7 @@ package PublicInbox::HTTP;
 use strict;
 use warnings;
 use base qw(PublicInbox::DS);
-use fields qw(httpd env rbuf input_left remote_addr remote_port forward pull);
+use fields qw(httpd env input_left remote_addr remote_port forward pull);
 use bytes (); # only for bytes::length
 use Fcntl qw(:seek);
 use Plack::HTTPParser qw(parse_http_request); # XS or pure Perl
@@ -60,7 +60,6 @@ sub new ($$$) {
 	my $self = fields::new($class);
 	$self->SUPER::new($sock, EPOLLIN | EPOLLONESHOT);
 	$self->{httpd} = $httpd;
-	$self->{rbuf} = '';
 	($self->{remote_addr}, $self->{remote_port}) =
 		PublicInbox::Daemon::host_with_port($addr);
 	$self;
@@ -75,31 +74,34 @@ sub event_step { # called by PublicInbox::DS
 	# otherwise we can be buffering infinitely w/o backpressure
 
 	return read_input($self) if defined $self->{env};
-	my $rbuf = \($self->{rbuf});
-	my $off = bytes::length($$rbuf);
-	$self->do_read($rbuf, 8192, $off) and rbuf_process($self);
+	my $rbuf = $self->{rbuf} // (\(my $x = ''));
+	$self->do_read($rbuf, 8192, bytes::length($$rbuf)) or return;
+	rbuf_process($self, $rbuf);
 }
 
 sub rbuf_process {
-	my ($self) = @_;
+	my ($self, $rbuf) = @_;
+	$rbuf //= $self->{rbuf} // (\(my $x = ''));
 
 	my %env = %{$self->{httpd}->{env}}; # full hash copy
-	my $r = parse_http_request($self->{rbuf}, \%env);
+	my $r = parse_http_request($$rbuf, \%env);
 
 	# We do not support Trailers in chunked requests, for now
 	# (they are rarely-used and git (as of 2.7.2) does not use them)
 	if ($r == -1 || $env{HTTP_TRAILER} ||
 			# this length-check is necessary for PURE_PERL=1:
-			($r == -2 && bytes::length($self->{rbuf}) > 0x4000)) {
+			($r == -2 && bytes::length($$rbuf) > 0x4000)) {
 		return quit($self, 400);
 	}
-	return $self->watch_in1 if $r < 0; # incomplete
-	$self->{rbuf} = substr($self->{rbuf}, $r);
-
+	if ($r < 0) { # incomplete
+		$self->rbuf_idle($rbuf);
+		return $self->watch_in1;
+	}
+	$$rbuf = substr($$rbuf, $r);
 	my $len = input_prepare($self, \%env);
 	defined $len or return write_err($self, undef); # EMFILE/ENFILE
 
-	$len ? read_input($self) : app_dispatch($self);
+	$len ? read_input($self, $rbuf) : app_dispatch($self, undef, $rbuf);
 }
 
 # IO::Handle::write returns boolean, this returns bytes written:
@@ -111,16 +113,15 @@ sub xwrite ($$$) {
 	$w;
 }
 
-sub read_input ($) {
-	my ($self) = @_;
+sub read_input ($;$) {
+	my ($self, $rbuf) = @_;
+	$rbuf //= $self->{rbuf} // (\(my $x = ''));
 	my $env = $self->{env};
 	return if $env->{REMOTE_ADDR}; # in app dispatch
-	return read_input_chunked($self) if env_chunked($env);
+	return read_input_chunked($self, $rbuf) if env_chunked($env);
 
 	# env->{CONTENT_LENGTH} (identity)
-	my $sock = $self->{sock};
 	my $len = delete $self->{input_left};
-	my $rbuf = \($self->{rbuf});
 	my $input = $env->{'psgi.input'};
 
 	while ($len > 0) {
@@ -135,15 +136,15 @@ sub read_input ($) {
 			}
 			$$rbuf = '';
 		}
-		my $r = sysread($sock, $$rbuf, 8192);
-		return recv_err($self, $r, $len) unless $r;
+		$self->do_read($rbuf, 8192) or return recv_err($self, $len);
 		# continue looping if $r > 0;
 	}
-	app_dispatch($self, $input);
+	app_dispatch($self, $input, $rbuf);
 }
 
 sub app_dispatch {
-	my ($self, $input) = @_;
+	my ($self, $input, $rbuf) = @_;
+	$self->rbuf_idle($rbuf);
 	my $env = $self->{env};
 	$env->{REMOTE_ADDR} = $self->{remote_addr};
 	$env->{REMOTE_PORT} = $self->{remote_port};
@@ -235,11 +236,12 @@ sub identity_wcb ($) {
 
 sub next_request ($) {
 	my ($self) = @_;
-	if ($self->{rbuf} eq '') { # wait for next request
-		$self->watch_in1;
-	} else { # avoid recursion for pipelined requests
+	if ($self->{rbuf}) {
+		# avoid recursion for pipelined requests
 		push @$pipelineq, $self;
 		$pipet ||= PublicInbox::EvCleanup::asap(*process_pipelineq);
+	} else { # wait for next request
+		$self->watch_in1;
 	}
 }
 
@@ -360,27 +362,25 @@ sub write_err {
 }
 
 sub recv_err {
-	my ($self, $r, $len) = @_;
-	return $self->close if (defined $r && $r == 0);
-	if ($! == EAGAIN) {
+	my ($self, $len) = @_;
+	if ($! == EAGAIN) { # epoll/kevent watch already set by do_read
 		$self->{input_left} = $len;
-		return $self->watch_in1;
+	} else {
+		err($self, "error reading input: $! ($len bytes remaining)");
 	}
-	err($self, "error reading for input: $! ($len bytes remaining)");
-	quit($self, 500);
 }
 
 sub read_input_chunked { # unlikely...
-	my ($self) = @_;
+	my ($self, $rbuf) = @_;
+	$rbuf //= $self->{rbuf} // (\(my $x = ''));
 	my $input = $self->{env}->{'psgi.input'};
-	my $sock = $self->{sock};
 	my $len = delete $self->{input_left};
-	my $rbuf = \($self->{rbuf});
 
 	while (1) { # chunk start
 		if ($len == CHUNK_ZEND) {
 			$$rbuf =~ s/\A\r\n//s and
-				return app_dispatch($self, $input);
+				return app_dispatch($self, $input, $rbuf);
+
 			return quit($self, 400) if bytes::length($$rbuf) > 2;
 		}
 		if ($len == CHUNK_END) {
@@ -403,9 +403,8 @@ sub read_input_chunked { # unlikely...
 		}
 
 		if ($len < 0) { # chunk header is trickled, read more
-			my $off = bytes::length($$rbuf);
-			my $r = sysread($sock, $$rbuf, 8192, $off);
-			return recv_err($self, $r, $len) unless $r;
+			$self->do_read($rbuf, 8192, bytes::length($$rbuf)) or
+				return recv_err($self, $len);
 			# (implicit) goto chunk_start if $r > 0;
 		}
 		$len = CHUNK_ZEND if $len == 0;
@@ -429,8 +428,8 @@ sub read_input_chunked { # unlikely...
 			}
 			if ($$rbuf eq '') {
 				# read more of current chunk
-				my $r = sysread($sock, $$rbuf, 8192);
-				return recv_err($self, $r, $len) unless $r;
+				$self->do_read($rbuf, 8192) or
+					return recv_err($self, $len);
 			}
 		}
 	}
@@ -459,7 +458,7 @@ sub close {
 # for graceful shutdown in PublicInbox::Daemon:
 sub busy () {
 	my ($self) = @_;
-	($self->{rbuf} ne '' || $self->{env} || $self->{wbuf});
+	($self->{rbuf} || $self->{env} || $self->{wbuf});
 }
 
 # fires after pending writes are complete:
diff --git a/lib/PublicInbox/NNTP.pm b/lib/PublicInbox/NNTP.pm
index 53e18281..0a053627 100644
--- a/lib/PublicInbox/NNTP.pm
+++ b/lib/PublicInbox/NNTP.pm
@@ -6,7 +6,7 @@ package PublicInbox::NNTP;
 use strict;
 use warnings;
 use base qw(PublicInbox::DS);
-use fields qw(nntpd article rbuf ng);
+use fields qw(nntpd article ng);
 use PublicInbox::Search;
 use PublicInbox::Msgmap;
 use PublicInbox::MID qw(mid_escape);
@@ -985,11 +985,7 @@ sub event_step {
 	return $self->close if $r < 0;
 	my $len = bytes::length($$rbuf);
 	return $self->close if ($len >= LINE_MAX);
-	if ($len) {
-		$self->{rbuf} = $rbuf;
-	} else {
-		delete $self->{rbuf};
-	}
+	$self->rbuf_idle($rbuf);
 	update_idle_time($self);
 
 	# maybe there's more pipelined data, or we'll have
-- 
EW


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, back to index

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-26  8:32 [PATCH] ds: share lazy rbuf handling between HTTP and NNTP Eric Wong

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.org/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox