From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id AEE751F5B7 for ; Sun, 5 Jul 2020 23:28:00 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 03/43] www*stream: gzip ->getline responses Date: Sun, 5 Jul 2020 23:27:19 +0000 Message-Id: <20200705232759.3161-4-e@yhbt.net> In-Reply-To: <20200705232759.3161-1-e@yhbt.net> References: <20200705232759.3161-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Our most common endpoints deserve to be gzipped. --- lib/PublicInbox/GzipFilter.pm | 21 +++++++++++++++----- lib/PublicInbox/WwwAtomStream.pm | 25 ++++++++++++++++------- lib/PublicInbox/WwwStream.pm | 34 ++++++++++++++++++++------------ 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index 115660cb1..95fced053 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -6,8 +6,9 @@ package PublicInbox::GzipFilter; use strict; use parent qw(Exporter); use Compress::Raw::Zlib qw(Z_FINISH Z_OK); -our @EXPORT_OK = qw(gzip_maybe); +our @EXPORT_OK = qw(gzip_maybe gzf_maybe); my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1); +my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip); sub new { bless {}, shift } @@ -18,18 +19,28 @@ sub attach { $self } -sub gzip_maybe ($) { - my ($env) = @_; +sub gzip_maybe ($$) { + my ($res_hdr, $env) = @_; return if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/; + my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); + return if $err != Z_OK; + # in case Plack::Middleware::Deflater is loaded: $env->{'plack.skip-deflater'} = 1; - my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); - $err == Z_OK ? $gz : undef; + push @$res_hdr, @GZIP_HDRS; + $gz; +} + +sub gzf_maybe ($$) { + my ($res_hdr, $env) = @_; + my $gz = gzip_maybe($res_hdr, $env) or return 0; + bless { gz => $gz }, __PACKAGE__; } # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'} +# Also used for ->getline callbacks sub translate ($$) { my $self = $_[0]; diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 9dc24e16e..c407e343f 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -14,6 +14,7 @@ use Digest::SHA qw(sha1_hex); use PublicInbox::Address; use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MsgTime qw(msg_timestamp); +use PublicInbox::GzipFilter qw(gzf_maybe); # called by PSGI server after getline: sub close {} @@ -26,18 +27,28 @@ sub new { sub response { my ($class, $ctx, $code, $cb) = @_; - [ $code, [ 'Content-Type', 'application/atom+xml' ], - $class->new($ctx, $cb) ] + my $h = [ 'Content-Type' => 'application/atom+xml' ]; + my $self = $class->new($ctx, $cb); + $self->{gzf} = gzf_maybe($h, $ctx->{env}); + [ $code, $h, $self ] } # called once for each message by PSGI server sub getline { my ($self) = @_; - if (my $middle = $self->{cb}) { - my $smsg = $middle->($self->{ctx}); - return feed_entry($self, $smsg) if $smsg; - } - delete $self->{cb} ? '' : undef; + my $buf = do { + if (my $middle = $self->{cb}) { + my $smsg = $middle->($self->{ctx}); + feed_entry($self, $smsg) if $smsg; + } + } // (delete($self->{cb}) ? '' : undef); + + # gzf may be GzipFilter, `undef' or `0' + my $gzf = $self->{gzf} or return $buf; + + return $gzf->translate($buf) if defined $buf; + $self->{gzf} = 0; # next call to ->getline returns $buf (== undef) + $gzf->translate(undef); } # private diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 79ed6871e..c964dbd41 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -14,7 +14,7 @@ our @EXPORT_OK = qw(html_oneshot); use bytes (); # length use PublicInbox::Hval qw(ascii_html prurl); use Compress::Raw::Zlib qw(Z_FINISH Z_OK); -use PublicInbox::GzipFilter qw(gzip_maybe); +use PublicInbox::GzipFilter qw(gzip_maybe gzf_maybe); our $TOR_URL = 'https://www.torproject.org/'; our $CODE_URL = 'https://public-inbox.org/public-inbox.git'; @@ -41,8 +41,10 @@ sub new { sub response { my ($class, $ctx, $code, $cb) = @_; - [ $code, [ 'Content-Type', 'text/html; charset=UTF-8' ], - $class->new($ctx, $cb) ] + my $h = [ 'Content-Type', 'text/html; charset=UTF-8' ]; + my $self = $class->new($ctx, $cb); + $self->{gzf} = gzf_maybe($h, $ctx->{env}); + [ $code, $h, $self ] } sub _html_top ($) { @@ -165,13 +167,20 @@ sub getline { my ($self) = @_; my $nr = $self->{nr}++; - return _html_top($self) if $nr == 0; + my $buf = do { + if ($nr == 0) { + _html_top($self); + } elsif (my $middle = $self->{cb}) { + $middle->($nr, $self->{ctx}); + } + } // (delete($self->{cb}) ? _html_end($self) : undef); - if (my $middle = $self->{cb}) { - $middle = $middle->($nr, $self->{ctx}) and return $middle; - } + # gzf may be GzipFilter, `undef' or `0' + my $gzf = $self->{gzf} or return $buf; - delete $self->{cb} ? _html_end($self) : undef; + return $gzf->translate($buf) if defined $buf; + $self->{gzf} = 0; # next call to ->getline returns $buf (== undef) + $gzf->translate(undef); } sub html_oneshot ($$;$) { @@ -181,8 +190,8 @@ sub html_oneshot ($$;$) { base_url => base_url($ctx), }, __PACKAGE__; my @x; - my @h = ('Content-Type' => 'text/html; charset=UTF-8'); - if (my $gz = gzip_maybe($ctx->{env})) { + my $h = [ 'Content-Type' => 'text/html; charset=UTF-8' ]; + if (my $gz = gzip_maybe($h, $ctx->{env})) { my $err = $gz->deflate(_html_top($self), $x[0]); die "gzip->deflate: $err" if $err != Z_OK; if ($sref) { @@ -193,15 +202,14 @@ sub html_oneshot ($$;$) { die "gzip->deflate: $err" if $err != Z_OK; $err = $gz->flush($x[0], Z_FINISH); die "gzip->flush: $err" if $err != Z_OK; - push @h, qw(Vary Accept-Encoding Content-Encoding gzip); } else { @x = (_html_top($self), $sref ? $$sref : (), _html_end($self)); } my $len = 0; $len += bytes::length($_) for @x; - push @h, 'Content-Length', $len; - [ $code, \@h, \@x ] + push @$h, 'Content-Length', $len; + [ $code, $h, \@x ] } 1;