From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 220881F8DB for ; Sun, 5 Jul 2020 23:28:01 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 04/43] wwwtext: gzip text/plain responses, as well Date: Sun, 5 Jul 2020 23:27:20 +0000 Message-Id: <20200705232759.3161-5-e@yhbt.net> In-Reply-To: <20200705232759.3161-1-e@yhbt.net> References: <20200705232759.3161-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Most of our plain-text responses are config files big enough to warrant compression. --- lib/PublicInbox/WwwText.pm | 17 ++++++++++++++--- t/psgi_text.t | 33 ++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/lib/PublicInbox/WwwText.pm b/lib/PublicInbox/WwwText.pm index b23a415e4..508005fba 100644 --- a/lib/PublicInbox/WwwText.pm +++ b/lib/PublicInbox/WwwText.pm @@ -10,6 +10,8 @@ use PublicInbox::Linkify; use PublicInbox::WwwStream; use PublicInbox::Hval qw(ascii_html); use URI::Escape qw(uri_escape_utf8); +use PublicInbox::GzipFilter qw(gzf_maybe); +use Compress::Raw::Zlib qw(Z_FINISH Z_OK); our $QP_URL = 'https://xapian.org/docs/queryparser.html'; our $WIKI_URL = 'https://en.wikipedia.org/wiki'; my $hl = eval { @@ -35,14 +37,23 @@ sub get_text { $code = 404; $txt = "404 Not Found ($key)\n"; } + my $env = $ctx->{env}; if ($raw) { - $hdr->[3] = bytes::length($txt); - return [ $code, $hdr, [ $txt ] ] + my $body; + if (my $gzf = $code == 200 ? gzf_maybe($hdr, $env) : undef) { + my $zbuf = $gzf->translate($txt); + undef $txt; + $body = [ $zbuf .= $gzf->translate(undef) ]; + } else { + $body = [ $txt ]; + } + $hdr->[3] = bytes::length($body->[0]); + return [ $code, $hdr, $body ] } # enforce trailing slash for "wget -r" compatibility if (!$have_tslash && $code == 200) { - my $url = $ctx->{-inbox}->base_url($ctx->{env}); + my $url = $ctx->{-inbox}->base_url($env); $url .= "_/text/$key/"; return [ 302, [ 'Content-Type', 'text/plain', diff --git a/t/psgi_text.t b/t/psgi_text.t index 833bcaba7..9867feaa4 100644 --- a/t/psgi_text.t +++ b/t/psgi_text.t @@ -10,7 +10,7 @@ my $maindir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Plack::Builder); -require_mods(@mods); +require_mods(@mods, 'IO::Uncompress::Gunzip'); use_ok $_ foreach @mods; use PublicInbox::Import; use PublicInbox::Git; @@ -26,17 +26,36 @@ my $www = PublicInbox::WWW->new($config); test_psgi(sub { $www->call(@_) }, sub { my ($cb) = @_; - my $res; - $res = $cb->(GET('/test/_/text/help/')); - like($res->content, qr!public-inbox help.*!, - 'default help'); - $res = $cb->(GET('/test/_/text/config/raw')); + my $gunzipped; + my $req = GET('/test/_/text/help/'); + my $res = $cb->($req); + my $content = $res->content; + like($content, qr!public-inbox help.*!, 'default help'); + $req->header('Accept-Encoding' => 'gzip'); + $res = $cb->($req); + is($res->header('Content-Encoding'), 'gzip', 'got gzip encoding'); + is($res->header('Content-Type'), 'text/html; charset=UTF-8', + 'got gzipped HTML'); + IO::Uncompress::Gunzip::gunzip(\($res->content) => \$gunzipped); + is($gunzipped, $content, 'gzipped content is correct'); + + $req = GET('/test/_/text/config/raw'); + $res = $cb->($req); + $content = $res->content; + my $olen = $res->header('Content-Length'); my $f = "$tmpdir/cfg"; open my $fh, '>', $f or die; - print $fh $res->content or die; + print $fh $content or die; close $fh or die; my $cfg = PublicInbox::Config->new($f); is($cfg->{"$cfgpfx.address"}, $addr, 'got expected address in config'); + + $req->header('Accept-Encoding' => 'gzip'); + $res = $cb->($req); + is($res->header('Content-Encoding'), 'gzip', 'got gzip encoding'); + ok($res->header('Content-Length') < $olen, 'gzipped help is smaller'); + IO::Uncompress::Gunzip::gunzip(\($res->content) => \$gunzipped); + is($gunzipped, $content); }); done_testing();