From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 07/43] wwwlisting: use GzipFilter for HTML
Date: Sun, 5 Jul 2020 23:27:23 +0000 [thread overview]
Message-ID: <20200705232759.3161-8-e@yhbt.net> (raw)
In-Reply-To: <20200705232759.3161-1-e@yhbt.net>
The changes to GzipFilter here may be beneficial for building
HTML and XML responses in other places, too.
---
MANIFEST | 1 +
lib/PublicInbox/GzipFilter.pm | 28 ++++++++++++++++++++++++++--
lib/PublicInbox/NoopFilter.pm | 13 +++++++++++++
lib/PublicInbox/WwwListing.pm | 25 +++++++++++++++----------
t/www_listing.t | 8 +++++++-
5 files changed, 62 insertions(+), 13 deletions(-)
create mode 100644 lib/PublicInbox/NoopFilter.pm
diff --git a/MANIFEST b/MANIFEST
index 6de2c7258..dcd7a7e5f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -159,6 +159,7 @@ lib/PublicInbox/NNTP.pm
lib/PublicInbox/NNTPD.pm
lib/PublicInbox/NNTPdeflate.pm
lib/PublicInbox/NewsWWW.pm
+lib/PublicInbox/NoopFilter.pm
lib/PublicInbox/Over.pm
lib/PublicInbox/OverIdx.pm
lib/PublicInbox/ParentPipe.pm
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 95fced053..8cc5ea00b 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -42,7 +42,7 @@ sub gzf_maybe ($$) {
# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
# Also used for ->getline callbacks
sub translate ($$) {
- my $self = $_[0];
+ my $self = $_[0]; # $_[1] => input
# allocate the zlib context lazily here, instead of in ->new.
# Deflate contexts are memory-intensive and this object may
@@ -72,10 +72,34 @@ sub write {
$_[0]->{fh}->write(translate($_[0], $_[1]));
}
+# similar to ->translate; use this when we're sure we know we have
+# more data to buffer after this
+sub zmore {
+ my $self = $_[0]; # $_[1] => input
+ my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
+ die "gzip->deflate: $err" if $err != Z_OK;
+ '';
+}
+
+# flushes and returns the final bit of gzipped data
+sub zflush ($;$) {
+ my $self = $_[0]; # $_[1] => final input (optional)
+ my $zbuf = delete $self->{zbuf};
+ my $gz = delete $self->{gz};
+ my $err;
+ if (defined $_[1]) {
+ $err = $gz->deflate($_[1], $zbuf);
+ die "gzip->deflate: $err" if $err != Z_OK;
+ }
+ $err = $gz->flush($zbuf, Z_FINISH);
+ die "gzip->flush: $err" if $err != Z_OK;
+ $zbuf;
+}
+
sub close {
my ($self) = @_;
my $fh = delete $self->{fh};
- $fh->write(translate($self, undef));
+ $fh->write(zflush($self));
$fh->close;
}
diff --git a/lib/PublicInbox/NoopFilter.pm b/lib/PublicInbox/NoopFilter.pm
new file mode 100644
index 000000000..b9c00ff7a
--- /dev/null
+++ b/lib/PublicInbox/NoopFilter.pm
@@ -0,0 +1,13 @@
+# Copyright (C) 2020 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+package PublicInbox::NoopFilter;
+use strict;
+
+sub new { bless \(my $ignore), __PACKAGE__ }
+
+# noop workalike for PublicInbox::GzipFilter methods
+sub translate { $_[1] // '' }
+sub zmore { $_[1] }
+sub zflush { $_[1] // '' }
+1;
diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm
index a3d4e2b35..780c97e91 100644
--- a/lib/PublicInbox/WwwListing.pm
+++ b/lib/PublicInbox/WwwListing.pm
@@ -10,6 +10,8 @@ use PublicInbox::Hval qw(ascii_html prurl);
use PublicInbox::Linkify;
use PublicInbox::View;
use PublicInbox::Inbox;
+use PublicInbox::NoopFilter;
+use PublicInbox::GzipFilter qw(gzf_maybe);
use bytes (); # bytes::length
use HTTP::Date qw(time2str);
use Digest::SHA ();
@@ -104,13 +106,15 @@ sub ibx_entry {
sub html ($$) {
my ($env, $list) = @_;
- my $title = 'public-inbox';
- my $out = '';
+ my $h = [ 'Content-Type', 'text/html; charset=UTF-8',
+ 'Content-Length', undef ];
+ my $gzf = gzf_maybe($h, $env) || PublicInbox::NoopFilter::new();
+ my $out = $gzf->zmore('<html><head><title>' .
+ 'public-inbox listing</title>' .
+ '</head><body><pre>');
my $code = 404;
if (@$list) {
- $title .= ' - listing';
$code = 200;
-
# Schwartzian transform since Inbox->modified is expensive
@$list = sort {
$b->[0] <=> $a->[0]
@@ -118,13 +122,14 @@ sub html ($$) {
my $tmp = join("\n", map { ibx_entry(@$_, $env) } @$list);
my $l = PublicInbox::Linkify->new;
- $out = '<pre>'.$l->to_html($tmp).'</pre><hr>';
+ $out .= $gzf->zmore($l->to_html($tmp));
+ } else {
+ $out .= $gzf->zmore('no inboxes, yet');
}
- $out = "<html><head><title>$title</title></head><body>" . $out;
- $out .= '<pre>'. PublicInbox::WwwStream::code_footer($env) .
- '</pre></body></html>';
-
- my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ];
+ $out .= $gzf->zflush('</pre><hr><pre>'.
+ PublicInbox::WwwStream::code_footer($env) .
+ '</pre></body></html>');
+ $h->[3] = bytes::length($out);
[ $code, $h, [ $out ] ];
}
diff --git a/t/www_listing.t b/t/www_listing.t
index 0aededd43..c4511cd1f 100644
--- a/t/www_listing.t
+++ b/t/www_listing.t
@@ -35,13 +35,19 @@ like(PublicInbox::WwwListing::fingerprint($bare), qr/\A[a-f0-9]{40}\z/,
sub tiny_test {
my ($json, $host, $port) = @_;
+ my $tmp;
my $http = HTTP::Tiny->new;
my $res = $http->get("http://$host:$port/");
is($res->{status}, 200, 'got HTML listing');
like($res->{content}, qr!</html>!si, 'listing looks like HTML');
+
+ $res = $http->get("http://$host:$port/", {'Accept-Encoding'=>'gzip'});
+ is($res->{status}, 200, 'got gzipped HTML listing');
+ IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp);
+ like($tmp, qr!</html>!si, 'unzipped listing looks like HTML');
+
$res = $http->get("http://$host:$port/manifest.js.gz");
is($res->{status}, 200, 'got manifest');
- my $tmp;
IO::Uncompress::Gunzip::gunzip(\(delete $res->{content}) => \$tmp);
unlike($tmp, qr/"modified":\s*"/, 'modified is an integer');
my $manifest = $json->decode($tmp);
next prev parent reply other threads:[~2020-07-05 23:28 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-05 23:27 [PATCH 00/43] www: async git cat-file w/ -httpd Eric Wong
2020-07-05 23:27 ` [PATCH 01/43] gzipfilter: minor cleanups Eric Wong
2020-07-05 23:27 ` [PATCH 02/43] wwwstream: oneshot: perform gzip without middleware Eric Wong
2020-07-05 23:27 ` [PATCH 03/43] www*stream: gzip ->getline responses Eric Wong
2020-07-05 23:27 ` [PATCH 04/43] wwwtext: gzip text/plain responses, as well Eric Wong
2020-07-05 23:27 ` [PATCH 05/43] wwwtext: switch to html_oneshot Eric Wong
2020-07-05 23:27 ` [PATCH 06/43] www: need: use WwwStream::html_oneshot Eric Wong
2020-07-05 23:27 ` Eric Wong [this message]
2020-07-05 23:27 ` [PATCH 08/43] gzipfilter: replace Compress::Raw::Deflate usages Eric Wong
2020-07-05 23:27 ` [PATCH 09/43] {gzip,noop}filter: ->zmore returns undef, always Eric Wong
2020-07-05 23:27 ` [PATCH 10/43] mbox: remove html_oneshot import Eric Wong
2020-07-05 23:27 ` [PATCH 11/43] wwwstatic: support gzipped directory listings Eric Wong
2020-07-05 23:27 ` [PATCH 12/43] qspawn: learn to gzip streaming responses Eric Wong
2020-07-05 23:27 ` [PATCH 13/43] stop auto-loading Plack::Middleware::Deflater Eric Wong
2020-07-05 23:27 ` [PATCH 14/43] mboxgz: do asynchronous git blob retrievals Eric Wong
2020-07-05 23:27 ` [PATCH 15/43] mboxgz: reduce hash depth Eric Wong
2020-07-05 23:27 ` [PATCH 16/43] mbox: async blob fetch for "single message" raw mboxrd Eric Wong
2020-07-05 23:27 ` [PATCH 17/43] wwwatomstream: simplify feed_update callers Eric Wong
2020-07-05 23:27 ` [PATCH 18/43] wwwatomstream: use PublicInbox::Inbox->modified for feed_updated Eric Wong
2020-07-05 23:27 ` [PATCH 19/43] wwwatomstream: reuse $ctx as $self Eric Wong
2020-07-05 23:27 ` [PATCH 20/43] xt/httpd-async-stream: allow more options Eric Wong
2020-07-05 23:27 ` [PATCH 21/43] wwwatomstream: support async blob fetch Eric Wong
2020-07-05 23:27 ` [PATCH 22/43] wwwstream: reduce object graph depth Eric Wong
2020-07-05 23:27 ` [PATCH 23/43] wwwstream: reduce blob fetch paths for ->getline Eric Wong
2020-07-05 23:27 ` [PATCH 24/43] www: start making gzipfilter the parent response class Eric Wong
2020-07-05 23:27 ` [PATCH 25/43] remove unused/redundant zlib-related imports Eric Wong
2020-07-05 23:27 ` [PATCH 26/43] wwwstream: use parent.pm and no warnings Eric Wong
2020-07-05 23:27 ` [PATCH 27/43] wwwstream: subclass off GzipFilter Eric Wong
2020-07-05 23:27 ` [PATCH 28/43] view: make /$INBOX/$MSGID/ permalink async Eric Wong
2020-07-05 23:27 ` [PATCH 29/43] view: /$INBOX/$MSGID/t/ reads blobs asynchronously Eric Wong
2020-07-05 23:27 ` [PATCH 30/43] view: update /$INBOX/$MSGID/T/ to be async Eric Wong
2020-07-05 23:27 ` [PATCH 31/43] feed: generate_i: eliminate pointless loop Eric Wong
2020-07-05 23:27 ` [PATCH 32/43] feed: /$INBOX/new.html fetches blobs asynchronously Eric Wong
2020-07-05 23:27 ` [PATCH 33/43] ssearchview: /$INBOX/?q=$QUERY&x=t uses async blobs Eric Wong
2020-07-05 23:27 ` [PATCH 34/43] view: eml_entry: reduce parameters Eric Wong
2020-07-05 23:27 ` [PATCH 35/43] view: /$INBOX/$MSGID/t/: avoid extra hash lookup in eml case Eric Wong
2020-07-05 23:27 ` [PATCH 36/43] wwwstream: eliminate ::response, use html_oneshot Eric Wong
2020-07-05 23:27 ` [PATCH 37/43] www: update internal docs Eric Wong
2020-07-05 23:27 ` [PATCH 38/43] view: simplify eml_entry callers further Eric Wong
2020-07-05 23:27 ` [PATCH 39/43] wwwtext: simplify gzf_maybe use Eric Wong
2020-07-05 23:27 ` [PATCH 40/43] wwwattach: support async blob retrievals Eric Wong
2020-07-05 23:27 ` [PATCH 41/43] gzipfilter: drop HTTP connection on bugs or data corruption Eric Wong
2020-07-05 23:27 ` [PATCH 42/43] daemon: warn on missing blobs Eric Wong
2020-07-05 23:27 ` [PATCH 43/43] gzipfilter: check http->{forward} for client disconnects Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200705232759.3161-8-e@yhbt.net \
--to=e@yhbt.net \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).