From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 59D3A1F8EC for ; Sun, 5 Jul 2020 23:28:05 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 07/43] wwwlisting: use GzipFilter for HTML Date: Sun, 5 Jul 2020 23:27:23 +0000 Message-Id: <20200705232759.3161-8-e@yhbt.net> In-Reply-To: <20200705232759.3161-1-e@yhbt.net> References: <20200705232759.3161-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The changes to GzipFilter here may be beneficial for building HTML and XML responses in other places, too. --- MANIFEST | 1 + lib/PublicInbox/GzipFilter.pm | 28 ++++++++++++++++++++++++++-- lib/PublicInbox/NoopFilter.pm | 13 +++++++++++++ lib/PublicInbox/WwwListing.pm | 25 +++++++++++++++---------- t/www_listing.t | 8 +++++++- 5 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 lib/PublicInbox/NoopFilter.pm diff --git a/MANIFEST b/MANIFEST index 6de2c7258..dcd7a7e5f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -159,6 +159,7 @@ lib/PublicInbox/NNTP.pm lib/PublicInbox/NNTPD.pm lib/PublicInbox/NNTPdeflate.pm lib/PublicInbox/NewsWWW.pm +lib/PublicInbox/NoopFilter.pm lib/PublicInbox/Over.pm lib/PublicInbox/OverIdx.pm lib/PublicInbox/ParentPipe.pm diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index 95fced053..8cc5ea00b 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -42,7 +42,7 @@ sub gzf_maybe ($$) { # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'} # Also used for ->getline callbacks sub translate ($$) { - my $self = $_[0]; + my $self = $_[0]; # $_[1] => input # allocate the zlib context lazily here, instead of in ->new. # Deflate contexts are memory-intensive and this object may @@ -72,10 +72,34 @@ sub write { $_[0]->{fh}->write(translate($_[0], $_[1])); } +# similar to ->translate; use this when we're sure we know we have +# more data to buffer after this +sub zmore { + my $self = $_[0]; # $_[1] => input + my $err = $self->{gz}->deflate($_[1], $self->{zbuf}); + die "gzip->deflate: $err" if $err != Z_OK; + ''; +} + +# flushes and returns the final bit of gzipped data +sub zflush ($;$) { + my $self = $_[0]; # $_[1] => final input (optional) + my $zbuf = delete $self->{zbuf}; + my $gz = delete $self->{gz}; + my $err; + if (defined $_[1]) { + $err = $gz->deflate($_[1], $zbuf); + die "gzip->deflate: $err" if $err != Z_OK; + } + $err = $gz->flush($zbuf, Z_FINISH); + die "gzip->flush: $err" if $err != Z_OK; + $zbuf; +} + sub close { my ($self) = @_; my $fh = delete $self->{fh}; - $fh->write(translate($self, undef)); + $fh->write(zflush($self)); $fh->close; } diff --git a/lib/PublicInbox/NoopFilter.pm b/lib/PublicInbox/NoopFilter.pm new file mode 100644 index 000000000..b9c00ff7a --- /dev/null +++ b/lib/PublicInbox/NoopFilter.pm @@ -0,0 +1,13 @@ +# Copyright (C) 2020 all contributors +# License: AGPL-3.0+ + +package PublicInbox::NoopFilter; +use strict; + +sub new { bless \(my $ignore), __PACKAGE__ } + +# noop workalike for PublicInbox::GzipFilter methods +sub translate { $_[1] // '' } +sub zmore { $_[1] } +sub zflush { $_[1] // '' } +1; diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index a3d4e2b35..780c97e91 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -10,6 +10,8 @@ use PublicInbox::Hval qw(ascii_html prurl); use PublicInbox::Linkify; use PublicInbox::View; use PublicInbox::Inbox; +use PublicInbox::NoopFilter; +use PublicInbox::GzipFilter qw(gzf_maybe); use bytes (); # bytes::length use HTTP::Date qw(time2str); use Digest::SHA (); @@ -104,13 +106,15 @@ sub ibx_entry { sub html ($$) { my ($env, $list) = @_; - my $title = 'public-inbox'; - my $out = ''; + my $h = [ 'Content-Type', 'text/html; charset=UTF-8', + 'Content-Length', undef ]; + my $gzf = gzf_maybe($h, $env) || PublicInbox::NoopFilter::new(); + my $out = $gzf->zmore('' . + 'public-inbox listing' . + '
');
 	my $code = 404;
 	if (@$list) {
-		$title .= ' - listing';
 		$code = 200;
-
 		# Schwartzian transform since Inbox->modified is expensive
 		@$list = sort {
 			$b->[0] <=> $a->[0]
@@ -118,13 +122,14 @@ sub html ($$) {
 
 		my $tmp = join("\n", map { ibx_entry(@$_, $env) } @$list);
 		my $l = PublicInbox::Linkify->new;
-		$out = '
'.$l->to_html($tmp).'

'; + $out .= $gzf->zmore($l->to_html($tmp)); + } else { + $out .= $gzf->zmore('no inboxes, yet'); } - $out = "$title" . $out; - $out .= '
'. PublicInbox::WwwStream::code_footer($env) .
-		'
'; - - my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; + $out .= $gzf->zflush('

'.
+				PublicInbox::WwwStream::code_footer($env) .
+				'
'); + $h->[3] = bytes::length($out); [ $code, $h, [ $out ] ]; } diff --git a/t/www_listing.t b/t/www_listing.t index 0aededd43..c4511cd1f 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -35,13 +35,19 @@ like(PublicInbox::WwwListing::fingerprint($bare), qr/\A[a-f0-9]{40}\z/, sub tiny_test { my ($json, $host, $port) = @_; + my $tmp; my $http = HTTP::Tiny->new; my $res = $http->get("http://$host:$port/"); is($res->{status}, 200, 'got HTML listing'); like($res->{content}, qr!!si, 'listing looks like HTML'); + + $res = $http->get("http://$host:$port/", {'Accept-Encoding'=>'gzip'}); + is($res->{status}, 200, 'got gzipped HTML listing'); + IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp); + like($tmp, qr!!si, 'unzipped listing looks like HTML'); + $res = $http->get("http://$host:$port/manifest.js.gz"); is($res->{status}, 200, 'got manifest'); - my $tmp; IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp); unlike($tmp, qr/"modified":\s*"/, 'modified is an integer'); my $manifest = $json->decode($tmp);