user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 24/43] www: start making gzipfilter the parent response class
  2020-07-05 23:27  7% [PATCH 00/43] www: async git cat-file w/ -httpd Eric Wong
@ 2020-07-05 23:27  3% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-07-05 23:27 UTC (permalink / raw)
  To: meta

Virtually all of our responses are going to be gzipped, anyways.
This will allow us to utilize zlib as a buffering layer and
share common code for async blob retrieval responses.

To streamline this and allow GzipFilter to be a parent class,
we'll replace the NoopFilter with a similar CompressNoop class
which emulates the two Compress::Raw::Zlib::Deflate methods we
use.

This drops a bunch of redundant code and will hopefully make
upcoming WwwStream changes easier to reason about.
---
 MANIFEST                         |  2 +-
 lib/PublicInbox/CompressNoop.pm  | 22 +++++++
 lib/PublicInbox/GzipFilter.pm    | 75 +++++++++++++++++++-----
 lib/PublicInbox/Mbox.pm          | 90 ++++++++---------------------
 lib/PublicInbox/MboxGz.pm        | 71 +++++------------------
 lib/PublicInbox/NoopFilter.pm    | 24 --------
 lib/PublicInbox/WwwAtomStream.pm | 98 ++++++++------------------------
 lib/PublicInbox/WwwListing.pm    |  3 +-
 lib/PublicInbox/WwwStatic.pm     |  3 +-
 9 files changed, 148 insertions(+), 240 deletions(-)
 create mode 100644 lib/PublicInbox/CompressNoop.pm
 delete mode 100644 lib/PublicInbox/NoopFilter.pm

diff --git a/MANIFEST b/MANIFEST
index 9b0f50203..963caad02 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -100,6 +100,7 @@ lib/PublicInbox/Admin.pm
 lib/PublicInbox/AdminEdit.pm
 lib/PublicInbox/AltId.pm
 lib/PublicInbox/Cgit.pm
+lib/PublicInbox/CompressNoop.pm
 lib/PublicInbox/Config.pm
 lib/PublicInbox/ContentHash.pm
 lib/PublicInbox/DS.pm
@@ -159,7 +160,6 @@ lib/PublicInbox/NNTP.pm
 lib/PublicInbox/NNTPD.pm
 lib/PublicInbox/NNTPdeflate.pm
 lib/PublicInbox/NewsWWW.pm
-lib/PublicInbox/NoopFilter.pm
 lib/PublicInbox/Over.pm
 lib/PublicInbox/OverIdx.pm
 lib/PublicInbox/ParentPipe.pm
diff --git a/lib/PublicInbox/CompressNoop.pm b/lib/PublicInbox/CompressNoop.pm
new file mode 100644
index 000000000..fe73c2d10
--- /dev/null
+++ b/lib/PublicInbox/CompressNoop.pm
@@ -0,0 +1,22 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Provide the same methods as Compress::Raw::Zlib::Deflate but
+# does no transformation of outgoing data
+package PublicInbox::CompressNoop;
+use strict;
+use Compress::Raw::Zlib qw(Z_OK);
+
+sub new { bless \(my $self), __PACKAGE__ }
+
+sub deflate { # ($self, $input, $output)
+	$_[2] .= $_[1];
+	Z_OK;
+}
+
+sub flush { # ($self, $output, $flags = Z_FINISH)
+	$_[1] //= '';
+	Z_OK;
+}
+
+1;
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 0a6c56a5d..99d43cf04 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -6,6 +6,10 @@ package PublicInbox::GzipFilter;
 use strict;
 use parent qw(Exporter);
 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
+use PublicInbox::CompressNoop;
+use PublicInbox::Eml;
+use PublicInbox::GitAsyncCat;
+
 our @EXPORT_OK = qw(gzf_maybe);
 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
 my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
@@ -14,22 +18,41 @@ sub new { bless {}, shift }
 
 # for Qspawn if using $env->{'pi-httpd.async'}
 sub attach {
-	my ($self, $fh) = @_;
-	$self->{fh} = $fh;
+	my ($self, $http_out) = @_;
+	$self->{http_out} = $http_out;
 	$self
 }
 
-# returns `0' and not `undef' on failure (see Www*Stream)
-sub gzf_maybe ($$) {
+sub gz_or_noop {
 	my ($res_hdr, $env) = @_;
-	return 0 if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/;
-	my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
-	return 0 if $err != Z_OK;
+	if (($env->{HTTP_ACCEPT_ENCODING} // '') =~ /\bgzip\b/) {
+		$env->{'plack.skip-deflater'} = 1;
+		push @$res_hdr, @GZIP_HDRS;
+		gzip_or_die();
+	} else {
+		PublicInbox::CompressNoop::new();
+	}
+}
 
-	# in case Plack::Middleware::Deflater is loaded:
-	$env->{'plack.skip-deflater'} = 1;
-	push @$res_hdr, @GZIP_HDRS;
-	bless { gz => $gz }, __PACKAGE__;
+sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }
+
+sub psgi_response {
+	my ($self, $code, $res_hdr, $next_cb, $eml_cb) = @_;
+	my $env = $self->{env};
+	$self->{gz} //= gz_or_noop($res_hdr, $env);
+	if ($env->{'pi-httpd.async'}) {
+		$self->{async_next} = $next_cb;
+		$self->{async_eml} = $eml_cb;
+		my $http = $env->{'psgix.io'}; # PublicInbox::HTTP
+		$http->{forward} = $self;
+		sub {
+			my ($wcb) = @_; # -httpd provided write callback
+			$self->{http_out} = $wcb->([$code, $res_hdr]);
+			$next_cb->($http); # start stepping
+		};
+	} else { # generic PSGI code path
+		[ $code, $res_hdr, $self ];
+	}
 }
 
 sub qsp_maybe ($$) {
@@ -80,7 +103,7 @@ sub translate ($$) {
 
 sub write {
 	# my $ret = bytes::length($_[1]); # XXX does anybody care?
-	$_[0]->{fh}->write(translate($_[0], $_[1]));
+	$_[0]->{http_out}->write(translate($_[0], $_[1]));
 }
 
 # similar to ->translate; use this when we're sure we know we have
@@ -109,9 +132,31 @@ sub zflush ($;$) {
 
 sub close {
 	my ($self) = @_;
-	my $fh = delete $self->{fh};
-	$fh->write(zflush($self));
-	$fh->close;
+	if (my $http_out = delete $self->{http_out}) {
+		$http_out->write(zflush($self));
+		$http_out->close;
+	}
+}
+
+# this is public-inbox-httpd-specific
+sub async_blob_cb { # git->cat_async callback
+	my ($bref, $oid, $type, $size, $self) = @_;
+	my $http = $self->{env}->{'psgix.io'} or return; # client abort
+	my $smsg = $self->{smsg} or die 'BUG: no smsg';
+	if (!defined($oid)) {
+		# it's possible to have TOCTOU if an admin runs
+		# public-inbox-(edit|purge), just move onto the next message
+		return $http->next_step($self->{async_next});
+	}
+	$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
+	$self->{async_eml}->($self, PublicInbox::Eml->new($bref));
+	$http->next_step($self->{async_next});
+}
+
+sub smsg_blob {
+	my ($self, $smsg) = @_;
+	git_async_cat($self->{-inbox}->git, $smsg->{blob},
+			\&async_blob_cb, $self);
 }
 
 1;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 895f828c5..abdf43c93 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -9,13 +9,11 @@
 # more common "push" model)
 package PublicInbox::Mbox;
 use strict;
-use warnings;
+use parent 'PublicInbox::GzipFilter';
 use PublicInbox::MID qw/mid_escape/;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Smsg;
 use PublicInbox::Eml;
-use PublicInbox::GitAsyncCat;
-use PublicInbox::GzipFilter qw(gzf_maybe);
 
 # called by PSGI server as body response
 # this gets called twice for every message, once to return the header,
@@ -25,49 +23,35 @@ sub getline {
 	my $smsg = $ctx->{smsg} or return;
 	my $ibx = $ctx->{-inbox};
 	my $eml = $ibx->smsg_eml($smsg) or return;
-	$ctx->{smsg} = $ibx->over->next_by_mid($ctx->{mid}, @{$ctx->{id_prev}});
-	msg_hdr($ctx, $eml, $smsg->{mid}) . msg_body($eml);
-}
-
-sub close { !!delete($_[0]->{http_out}) }
-
-sub mbox_async_step ($) { # public-inbox-httpd-only
-	my ($ctx) = @_;
-	if (my $smsg = $ctx->{smsg}) {
-		git_async_cat($ctx->{-inbox}->git, $smsg->{blob},
-				\&mbox_blob_cb, $ctx);
-	} elsif (my $out = delete $ctx->{http_out}) {
-		$out->close;
+	my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
+	$ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
+	if ($n) {
+		$ctx->translate(msg_body($eml));
+	} else { # last message
+		$ctx->zmore(msg_body($eml));
+		$ctx->zflush;
 	}
 }
 
 # called by PublicInbox::DS::write
-sub mbox_async_next {
+sub async_next {
 	my ($http) = @_; # PublicInbox::HTTP
 	my $ctx = $http->{forward} or return; # client aborted
 	eval {
-		$ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(
-					$ctx->{mid}, @{$ctx->{id_prev}});
-		mbox_async_step($ctx);
+		my $smsg = $ctx->{smsg} or return $ctx->close;
+		$ctx->smsg_blob($smsg);
 	};
+	warn "E: $@" if $@;
 }
 
-# this is public-inbox-httpd-specific
-sub mbox_blob_cb { # git->cat_async callback
-	my ($bref, $oid, $type, $size, $ctx) = @_;
-	my $http = $ctx->{env}->{'psgix.io'} or return; # client abort
-	my $smsg = delete $ctx->{smsg} or die 'BUG: no smsg';
-	if (!defined($oid)) {
-		# it's possible to have TOCTOU if an admin runs
-		# public-inbox-(edit|purge), just move onto the next message
-		return $http->next_step(\&mbox_async_next);
-	} else {
-		$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
-	}
-	my $eml = PublicInbox::Eml->new($bref);
-	$ctx->{http_out}->write(msg_hdr($ctx, $eml, $smsg->{mid}));
-	$ctx->{http_out}->write(msg_body($eml));
-	$http->next_step(\&mbox_async_next);
+sub async_eml { # ->{async_eml} for async_blob_cb
+	my ($ctx, $eml) = @_;
+	my $smsg = delete $ctx->{smsg};
+	# next message
+	$ctx->{smsg} = $ctx->{-inbox}->over->next_by_mid(@{$ctx->{next_arg}});
+
+	$ctx->zmore(msg_hdr($ctx, $eml, $smsg->{mid}));
+	$ctx->{http_out}->write($ctx->translate(msg_body($eml)));
 }
 
 sub res_hdr ($$) {
@@ -97,41 +81,17 @@ sub no_over_raw ($) {
 		[ msg_hdr($ctx, $eml, $ctx->{mid}) . msg_body($eml) ] ]
 }
 
-sub stream_raw { # MboxGz response callback
-	my ($ctx) = @_;
-	delete($ctx->{smsg}) //
-		$ctx->{-inbox}->over->next_by_mid($ctx->{mid},
-						@{$ctx->{id_prev}});
-}
-
 # /$INBOX/$MESSAGE_ID/raw
 sub emit_raw {
 	my ($ctx) = @_;
-	my $env = $ctx->{env};
-	$ctx->{base_url} = $ctx->{-inbox}->base_url($env);
+	$ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
 	my $over = $ctx->{-inbox}->over or return no_over_raw($ctx);
 	my ($id, $prev);
-	my $smsg = $over->next_by_mid($ctx->{mid}, \$id, \$prev) or return;
-	$ctx->{smsg} = $smsg;
+	my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
+	my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
 	my $res_hdr = res_hdr($ctx, $smsg->{subject});
-	$ctx->{id_prev} = [ \$id, \$prev ];
-
-	if (my $gzf = gzf_maybe($res_hdr, $env)) {
-		$ctx->{gz} = delete $gzf->{gz};
-		require PublicInbox::MboxGz;
-		PublicInbox::MboxGz::response($ctx, \&stream_raw, $res_hdr);
-	} elsif ($env->{'pi-httpd.async'}) {
-		sub {
-			my ($wcb) = @_; # -httpd provided write callback
-			$ctx->{http_out} = $wcb->([200, $res_hdr]);
-			$ctx->{env}->{'psgix.io'}->{forward} = $ctx;
-			bless $ctx, __PACKAGE__;
-			mbox_async_step($ctx); # start stepping
-		};
-	} else { # generic PSGI code path
-		bless $ctx, __PACKAGE__; # respond to ->getline
-		[ 200, $res_hdr, $ctx ];
-	}
+	bless $ctx, __PACKAGE__;
+	$ctx->psgi_response(200, $res_hdr, \&async_next, \&async_eml);
 }
 
 sub msg_hdr ($$;$) {
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 716bf7b19..fdd16f68e 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -6,77 +6,32 @@ use parent 'PublicInbox::GzipFilter';
 use PublicInbox::Eml;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Mbox;
-use PublicInbox::GitAsyncCat;
 *msg_hdr = \&PublicInbox::Mbox::msg_hdr;
 *msg_body = \&PublicInbox::Mbox::msg_body;
 
-# this is public-inbox-httpd-specific
-sub mboxgz_blob_cb { # git->cat_async callback
-	my ($bref, $oid, $type, $size, $self) = @_;
-	my $http = $self->{env}->{'psgix.io'} or return; # client abort
-	my $smsg = delete $self->{smsg} or die 'BUG: no smsg';
-	if (!defined($oid)) {
-		# it's possible to have TOCTOU if an admin runs
-		# public-inbox-(edit|purge), just move onto the next message
-		return $http->next_step(\&mboxgz_async_next);
-	} else {
-		$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
-	}
-	my $eml = PublicInbox::Eml->new($bref);
-	$self->zmore(msg_hdr($self, $eml, $smsg->{mid}));
-
-	# PublicInbox::HTTP::{Chunked,Identity}::write
-	$self->{http_out}->write($self->translate(msg_body($eml)));
-
-	$http->next_step(\&mboxgz_async_next);
-}
-
-# this is public-inbox-httpd-specific
-sub mboxgz_async_step ($) {
-	my ($self) = @_;
-	if (my $smsg = $self->{smsg} = $self->{cb}->($self)) {
-		git_async_cat($self->{-inbox}->git, $smsg->{blob},
-				\&mboxgz_blob_cb, $self);
-	} elsif (my $out = delete $self->{http_out}) {
-		$out->write($self->zflush);
-		$out->close;
-	}
-}
-
-# called by PublicInbox::DS::write
-sub mboxgz_async_next {
+sub async_next ($) {
 	my ($http) = @_; # PublicInbox::HTTP
-	mboxgz_async_step($http->{forward});
-}
-
-# called by PublicInbox::HTTP::close, or any other PSGI server
-sub close { !!delete($_[0]->{http_out}) }
-
-sub response {
-	my ($self, $cb, $res_hdr) = @_;
-	$self->{cb} = $cb;
-	bless $self, __PACKAGE__;
-	if ($self->{env}->{'pi-httpd.async'}) {
-		sub {
-			my ($wcb) = @_; # -httpd provided write callback
-			$self->{http_out} = $wcb->([200, $res_hdr]);
-			$self->{env}->{'psgix.io'}->{forward} = $self;
-			mboxgz_async_step($self); # start stepping
-		};
-	} else { # generic PSGI
-		[ 200, $res_hdr, $self ];
-	}
+	my $ctx = $http->{forward} or return;
+	eval {
+		$ctx->{smsg} = $ctx->{cb}->($ctx) or return $ctx->close;
+		$ctx->smsg_blob($ctx->{smsg});
+	};
+	warn "E: $@" if $@;
 }
 
 sub mbox_gz {
 	my ($self, $cb, $fn) = @_;
+	$self->{cb} = $cb;
 	$self->{base_url} = $self->{-inbox}->base_url($self->{env});
 	$self->{gz} = PublicInbox::GzipFilter::gzip_or_die();
 	$fn = to_filename($fn // 'no-subject');
 	$fn = 'no-subject' if $fn eq '';
 	# http://www.iana.org/assignments/media-types/application/gzip
-	response($self, $cb, [ qw(Content-Type application/gzip),
-		'Content-Disposition', "inline; filename=$fn.mbox.gz" ]);
+	bless $self, __PACKAGE__;
+	my $res_hdr = [ 'Content-Type' => 'application/gzip',
+		'Content-Disposition' => "inline; filename=$fn.mbox.gz" ];
+	$self->psgi_response(200, $res_hdr, \&async_next,
+				\&PublicInbox::Mbox::async_eml);
 }
 
 # called by Plack::Util::foreach or similar (generic PSGI)
diff --git a/lib/PublicInbox/NoopFilter.pm b/lib/PublicInbox/NoopFilter.pm
deleted file mode 100644
index a97dbde64..000000000
--- a/lib/PublicInbox/NoopFilter.pm
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (C) 2020 all contributors <meta@public-inbox.org>
-# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
-
-package PublicInbox::NoopFilter;
-use strict;
-
-sub new { bless \(my $self = ''), __PACKAGE__ }
-
-# noop workalike for PublicInbox::GzipFilter methods
-sub translate {
-	my $self = $_[0];
-	my $ret = $$self .= ($_[1] // '');
-	$$self = '';
-	$ret;
-}
-
-sub zmore {
-	${$_[0]} .= $_[1];
-	undef;
-}
-
-sub zflush { translate($_[0], $_[1]) }
-
-1;
diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm
index 583309228..073df1dfa 100644
--- a/lib/PublicInbox/WwwAtomStream.pm
+++ b/lib/PublicInbox/WwwAtomStream.pm
@@ -7,107 +7,59 @@
 # more common "push" model)
 package PublicInbox::WwwAtomStream;
 use strict;
-use warnings;
+use parent 'PublicInbox::GzipFilter';
 
 use POSIX qw(strftime);
 use Digest::SHA qw(sha1_hex);
 use PublicInbox::Address;
 use PublicInbox::Hval qw(ascii_html mid_href);
 use PublicInbox::MsgTime qw(msg_timestamp);
-use PublicInbox::GzipFilter qw(gzf_maybe);
-use PublicInbox::GitAsyncCat;
-
-# called by generic PSGI server after getline,
-# and also by PublicInbox::HTTP::close
-sub close { !!delete($_[0]->{http_out}) }
 
 sub new {
 	my ($class, $ctx, $cb) = @_;
 	$ctx->{feed_base_url} = $ctx->{-inbox}->base_url($ctx->{env});
-	$ctx->{cb} = $cb || \&close;
+	$ctx->{cb} = $cb || \&PublicInbox::GzipFilter::close;
 	$ctx->{emit_header} = 1;
 	bless $ctx, $class;
 }
 
-# called by PublicInbox::DS::write
-sub atom_async_next {
+sub async_next ($) {
 	my ($http) = @_; # PublicInbox::HTTP
-	atom_async_step($http->{forward});
-}
-
-# this is public-inbox-httpd-specific
-sub atom_blob_cb { # git->cat_async callback
-	my ($bref, $oid, $type, $size, $ctx) = @_;
-	my $http = $ctx->{env}->{'psgix.io'} or return; # client abort
-	my $smsg = delete $ctx->{smsg} or die 'BUG: no smsg';
-	if (!defined($oid)) {
-		# it's possible to have TOCTOU if an admin runs
-		# public-inbox-(edit|purge), just move onto the next message
-		return $http->next_step(\&atom_async_next);
-	} else {
-		$smsg->{blob} eq $oid or die "BUG: $smsg->{blob} != $oid";
-	}
-	my $buf = feed_entry($ctx, $smsg, PublicInbox::Eml->new($bref));
-	if (my $gzf = $ctx->{gzf}) {
-		$buf = $gzf->translate($buf);
-	}
-	# PublicInbox::HTTP::{Chunked,Identity}::write
-	$ctx->{http_out}->write($buf);
-
-	$http->next_step(\&atom_async_next);
+	my $ctx = $http->{forward} or return;
+	eval {
+		if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) {
+			$ctx->smsg_blob($smsg);
+		} else {
+			$ctx->{http_out}->write($ctx->translate('</feed>'));
+			$ctx->close;
+		}
+	};
+	warn "E: $@" if $@;
 }
 
-sub atom_async_step { # this is public-inbox-httpd-specific
-	my ($ctx) = @_;
-	if (my $smsg = $ctx->{smsg} = $ctx->{cb}->($ctx)) {
-		git_async_cat($ctx->{-inbox}->git, $smsg->{blob},
-				\&atom_blob_cb, $ctx);
-	} elsif (my $out = delete $ctx->{http_out}) {
-		if (my $gzf = delete $ctx->{gzf}) {
-			$out->write($gzf->zflush);
-		}
-		$out->close;
-	}
+sub async_eml { # ->{async_eml} for async_blob_cb
+	my ($ctx, $eml) = @_;
+	my $smsg = delete $ctx->{smsg};
+	$ctx->{http_out}->write($ctx->translate(feed_entry($ctx, $smsg, $eml)))
 }
 
 sub response {
 	my ($class, $ctx, $code, $cb) = @_;
 	my $res_hdr = [ 'Content-Type' => 'application/atom+xml' ];
 	$class->new($ctx, $cb);
-	$ctx->{gzf} = gzf_maybe($res_hdr, $ctx->{env});
-	if ($ctx->{env}->{'pi-httpd.async'}) {
-		sub {
-			my ($wcb) = @_; # -httpd provided write callback
-			$ctx->{http_out} = $wcb->([200, $res_hdr]);
-			$ctx->{env}->{'psgix.io'}->{forward} = $ctx;
-			atom_async_step($ctx); # start stepping
-		};
-	} else {
-		[ $code, $res_hdr, $ctx ];
-	}
+	$ctx->psgi_response($code, $res_hdr, \&async_next, \&async_eml);
 }
 
 # called once for each message by PSGI server
 sub getline {
 	my ($self) = @_;
-	my $buf = do {
-		if (my $middle = $self->{cb}) {
-			if (my $smsg = $middle->($self)) {
-				my $eml = $self->{-inbox}->smsg_eml($smsg) or
-						return '';
-				feed_entry($self, $smsg, $eml);
-			} else {
-				undef;
-			}
-		}
-	} // (delete($self->{cb}) ? '</feed>' : undef);
-
-	# gzf may be GzipFilter, `undef' or `0'
-	my $gzf = $self->{gzf} or return $buf;
-
-	return $gzf->translate($buf) if defined $buf;
-	$self->{gzf} = 0; # next call to ->getline returns $buf (== undef)
-	$gzf->translate(undef);
+	my $cb = $self->{cb} or return;
+	while (my $smsg = $cb->($self)) {
+		my $eml = $self->{-inbox}->smsg_eml($smsg) or next;
+		return $self->translate(feed_entry($self, $smsg, $eml));
+	}
+	delete $self->{cb};
+	$self->zflush('</feed>');
 }
 
 # private
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index d641e6d5c..5f85e3464 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -10,7 +10,6 @@ use PublicInbox::Hval qw(ascii_html prurl);
 use PublicInbox::Linkify;
 use PublicInbox::View;
 use PublicInbox::Inbox;
-use PublicInbox::NoopFilter;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use bytes (); # bytes::length
 use HTTP::Date qw(time2str);
@@ -108,7 +107,7 @@ sub html ($$) {
 	my ($env, $list) = @_;
 	my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
 			'Content-Length', undef ];
-	my $gzf = gzf_maybe($h, $env) || PublicInbox::NoopFilter::new();
+	my $gzf = gzf_maybe($h, $env);
 	$gzf->zmore('<html><head><title>' .
 				'public-inbox listing</title>' .
 				'</head><body><pre>');
diff --git a/lib/PublicInbox/WwwStatic.pm b/lib/PublicInbox/WwwStatic.pm
index d0611949d..051d2e039 100644
--- a/lib/PublicInbox/WwwStatic.pm
+++ b/lib/PublicInbox/WwwStatic.pm
@@ -17,7 +17,6 @@ use HTTP::Date qw(time2str);
 use HTTP::Status qw(status_message);
 use Errno qw(EACCES ENOTDIR ENOENT);
 use URI::Escape qw(uri_escape_utf8);
-use PublicInbox::NoopFilter;
 use PublicInbox::GzipFilter qw(gzf_maybe);
 use PublicInbox::Hval qw(ascii_html);
 use Plack::MIME;
@@ -313,7 +312,7 @@ sub dir_response ($$$) {
 
 	my $path_info_html = ascii_html($path_info);
 	my $h = [qw(Content-Type text/html Content-Length), undef];
-	my $gzf = gzf_maybe($h, $env) || PublicInbox::NoopFilter::new();
+	my $gzf = gzf_maybe($h, $env);
 	$gzf->zmore("<html><head><title>Index of $path_info_html</title>" .
 		${$self->{style}} .
 		"</head><body><pre>Index of $path_info_html</pre><hr><pre>\n");

^ permalink raw reply related	[relevance 3%]

* [PATCH 00/43] www: async git cat-file w/ -httpd
@ 2020-07-05 23:27  7% Eric Wong
  2020-07-05 23:27  3% ` [PATCH 24/43] www: start making gzipfilter the parent response class Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-07-05 23:27 UTC (permalink / raw)
  To: meta

This allows -httpd to make better use of time it spends waiting
on git-cat-file to respond.  It allows us to deal with
high-latency HDD storage without a client monopolizing the event
loop.  Even on a mid-range consumer-grade SSD, this seems to
give a 10+% speed improvement for HTTP responses requiring many
blobs, including all /T/, /t/, and /t.mbox.gz endpoints.

This only benefits indexed inboxes (both v1 and v2); I'm not
sure if anybody still uses unindexed v1 inboxes nowadays.

A new xt/httpd-async-stream.t maintainer test ensures checksums
for responses before and after this series match exactly as
before.

This builds off a branch I started several months ago (but never
published here) to integrate gzip responses into our codebase
and remove our optional dependency on Plack::Middleware::Deflater.

We already gzip a bunch of things independent of
Plack::Middleware::Deflater: manifest.js.gz, altid SQLite3 dumps
and all the *.mbox.gz endpoints; so being able to use gzip on
all of our responses without an extra dependency seemed logical.

Being able to consistently use our GzipFilter API to perform
buffering via ->zmore made it significantly easier to reason
about small response chunks for ghost messages interspersed with
large ones when streaming /$INBOX/$MSGID/t/ endpoints.

I'm not yet maximizing use of ->zmore for all buffering of HTTP
responses, yet; measurements need to happen, first.  That may
happen in the 1.7 time frame.  In particular, we would need to
ensure the Perl method dispatch and DSO overhead to Zlib.so and
libz.so of making many ->zmore calls doesn't cause performance
regressions compared to the current `.=' use and calling
->zmore/->translate fewer times.

Eric Wong (43):
  gzipfilter: minor cleanups
  wwwstream: oneshot: perform gzip without middleware
  www*stream: gzip ->getline responses
  wwwtext: gzip text/plain responses, as well
  wwwtext: switch to html_oneshot
  www: need: use WwwStream::html_oneshot
  wwwlisting: use GzipFilter for HTML
  gzipfilter: replace Compress::Raw::Deflate usages
  {gzip,noop}filter: ->zmore returns undef, always
  mbox: remove html_oneshot import
  wwwstatic: support gzipped response directory listings
  qspawn: learn to gzip streaming responses
  stop auto-loading Plack::Middleware::Deflater
  mboxgz: do asynchronous git blob retrievals
  mboxgz: reduce object hash depth
  mbox: async blob retrieval for "single message" raw mboxrd
  wwwatomstream: simplify feed_update callers
  wwwatomstream: use PublicInbox::Inbox->modified for feed_updated
  wwwatomstream: reuse $ctx as $self
  xt/httpd-async-stream: allow more options
  wwwatomstream: support asynchronous blob retrievals
  wwwstream: reduce object graph depth
  wwwstream: reduce blob retrieval paths for ->getline
  www: start making gzipfilter the parent response class
  remove unused/redundant zlib-related imports
  wwwstream: use parent.pm and no warnings
  wwwstream: subclass off GzipFilter
  view: wire up /$INBOX/$MESSAGE_ID/ permalink to async
  view: /$INBOX/$MSGID/t/ reads blobs asynchronously
  view: update /$INBOX/$MSGID/T/ to be async
  feed: generate_i: eliminate pointless loop
  feed: /$INBOX/new.html retrieves blobs asynchronously
  ssearchview: /$INBOX/?q=$QUERY&x=t uses async blobs
  view: eml_entry: reduce parameters
  view: /$INBOX/$MSGID/t/: avoid extra hash lookup in eml case
  wwwstream: eliminate ::response, use html_oneshot
  www: update internal docs
  view: simplify eml_entry callers further
  wwwtext: simplify gzf_maybe use
  wwwattach: support async blob retrievals
  gzipfilter: drop HTTP connection on bugs or data corruption
  daemon: warn on missing blobs
  gzipfilter: check http->{forward} for client disconnects

 Documentation/mknews.perl            |  20 +--
 Documentation/public-inbox-httpd.pod |   1 -
 Documentation/technical/ds.txt       |   4 +-
 INSTALL                              |   5 -
 MANIFEST                             |   2 +
 ci/deps.perl                         |   1 -
 examples/cgit.psgi                   |   8 -
 examples/newswww.psgi                |   8 -
 examples/public-inbox.psgi           |   9 --
 examples/unsubscribe.psgi            |   1 -
 lib/PublicInbox/CompressNoop.pm      |  22 +++
 lib/PublicInbox/Feed.pm              |  22 ++-
 lib/PublicInbox/GetlineBody.pm       |   4 +-
 lib/PublicInbox/GzipFilter.pm        | 168 +++++++++++++++++---
 lib/PublicInbox/HTTP.pm              |   7 +
 lib/PublicInbox/HTTPD.pm             |   5 +-
 lib/PublicInbox/IMAP.pm              |   1 +
 lib/PublicInbox/Mbox.pm              | 137 +++++++++--------
 lib/PublicInbox/MboxGz.pm            |  81 ++++------
 lib/PublicInbox/NNTP.pm              |   1 +
 lib/PublicInbox/Qspawn.pm            |   6 +-
 lib/PublicInbox/SearchView.pm        |  40 ++---
 lib/PublicInbox/View.pm              | 219 ++++++++++++++-------------
 lib/PublicInbox/WWW.pm               |   9 +-
 lib/PublicInbox/WwwAtomStream.pm     |  66 ++++----
 lib/PublicInbox/WwwAttach.pm         |  63 ++++++--
 lib/PublicInbox/WwwListing.pm        |  24 +--
 lib/PublicInbox/WwwStatic.pm         |  14 +-
 lib/PublicInbox/WwwStream.pm         | 110 ++++++++------
 lib/PublicInbox/WwwText.pm           |  26 ++--
 script/public-inbox-httpd            |   9 --
 script/public-inbox.cgi              |   7 -
 t/httpd-corner.psgi                  |   7 +
 t/httpd-corner.t                     |   9 +-
 t/plack.t                            |   4 +
 t/psgi_attach.t                      | 162 +++++++++++---------
 t/psgi_text.t                        |  33 +++-
 t/psgi_v2.t                          |  80 ++++++++--
 t/www_listing.t                      |   8 +-
 t/www_static.t                       |  11 +-
 xt/httpd-async-stream.t              | 104 +++++++++++++
 41 files changed, 964 insertions(+), 554 deletions(-)
 create mode 100644 lib/PublicInbox/CompressNoop.pm
 create mode 100644 xt/httpd-async-stream.t

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-07-05 23:27  7% [PATCH 00/43] www: async git cat-file w/ -httpd Eric Wong
2020-07-05 23:27  3% ` [PATCH 24/43] www: start making gzipfilter the parent response class Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).