From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 86E351F8ED for ; Sun, 5 Jul 2020 23:28:05 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/43] gzipfilter: replace Compress::Raw::Deflate usages Date: Sun, 5 Jul 2020 23:27:24 +0000 Message-Id: <20200705232759.3161-9-e@yhbt.net> In-Reply-To: <20200705232759.3161-1-e@yhbt.net> References: <20200705232759.3161-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The new ->zmore and ->zflush APIs make it possible to replace existing verbose usages of Compress::Raw::Deflate and simplify buffering logic for streaming large gzipped data. One potentially user visible change is we now break the mbox.gz response on zlib failures, instead of silently continuing onto the next message. zlib only seems to fail on OOM, which should be rare; so it's ideal we drop the connection anyways. --- lib/PublicInbox/GzipFilter.pm | 27 ++++++++++------------- lib/PublicInbox/MboxGz.pm | 41 ++++++++++------------------------- lib/PublicInbox/WwwStream.pm | 27 ++++++++--------------- 3 files changed, 31 insertions(+), 64 deletions(-) diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index 8cc5ea00b..d2eb4e664 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -6,7 +6,7 @@ package PublicInbox::GzipFilter; use strict; use parent qw(Exporter); use Compress::Raw::Zlib qw(Z_FINISH Z_OK); -our @EXPORT_OK = qw(gzip_maybe gzf_maybe); +our @EXPORT_OK = qw(gzf_maybe); my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1); my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip); @@ -19,24 +19,23 @@ sub attach { $self } -sub gzip_maybe ($$) { +# returns `0' and not `undef' on failure (see Www*Stream) +sub gzf_maybe ($$) { my ($res_hdr, $env) = @_; - return if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/; - + return 0 if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/; my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); - return if $err != Z_OK; + return 0 if $err != Z_OK; # in case Plack::Middleware::Deflater is loaded: $env->{'plack.skip-deflater'} = 1; - push @$res_hdr, @GZIP_HDRS; - $gz; + bless { gz => $gz }, __PACKAGE__; } -sub gzf_maybe ($$) { - my ($res_hdr, $env) = @_; - my $gz = gzip_maybe($res_hdr, $env) or return 0; - bless { gz => $gz }, __PACKAGE__; +sub gzip_or_die () { + my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); + $err == Z_OK or die "Deflate->new failed: $err"; + $gz; } # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'} @@ -47,11 +46,7 @@ sub translate ($$) { # allocate the zlib context lazily here, instead of in ->new. # Deflate contexts are memory-intensive and this object may # be sitting in the Qspawn limiter queue for a while. - my $gz = $self->{gz} //= do { - my ($g, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); - $err == Z_OK or die "Deflate->new failed: $err"; - $g; - }; + my $gz = $self->{gz} //= gzip_or_die(); my $zbuf = delete($self->{zbuf}); if (defined $_[1]) { # my $buf = $_[1]; my $err = $gz->deflate($_[1], $zbuf); diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index f7fc4afc1..535ef96c9 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -2,19 +2,19 @@ # License: AGPL-3.0+ package PublicInbox::MboxGz; use strict; -use warnings; +use parent 'PublicInbox::GzipFilter'; use PublicInbox::Eml; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Mbox; -use Compress::Raw::Zlib qw(Z_FINISH Z_OK); -my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1); sub new { my ($class, $ctx, $cb) = @_; $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env}); - my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); - $err == Z_OK or die "Deflate->new failed: $err"; - bless { gz => $gz, cb => $cb, ctx => $ctx }, $class; + bless { + gz => PublicInbox::GzipFilter::gzip_or_die(), + cb => $cb, + ctx => $ctx + }, $class; } sub response { @@ -27,40 +27,21 @@ sub response { [ 200, $h, $body ]; } -sub gzip_fail ($$) { - my ($ctx, $err) = @_; - $ctx->{env}->{'psgi.errors'}->print("deflate failed: $err\n"); - ''; -} - # called by Plack::Util::foreach or similar sub getline { my ($self) = @_; my $ctx = $self->{ctx} or return; - my $gz = $self->{gz}; - my $buf = delete($self->{buf}); while (my $smsg = $self->{cb}->($ctx)) { my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next; my $h = PublicInbox::Eml->new($mref)->header_obj; - - my $err = $gz->deflate( - PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}), - $buf); - return gzip_fail($ctx, $err) if $err != Z_OK; - - $err = $gz->deflate(PublicInbox::Mbox::msg_body($$mref), $buf); - return gzip_fail($ctx, $err) if $err != Z_OK; - - return $buf if length($buf) >= 8192; - - # be fair to other clients on public-inbox-httpd: - $self->{buf} = $buf; - return ''; + $self->zmore( + PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}) + ); + return $self->translate(PublicInbox::Mbox::msg_body($$mref)); } # signal that we're done and can return undef next call: delete $self->{ctx}; - my $err = $gz->flush($buf, Z_FINISH); - ($err == Z_OK) ? $buf : gzip_fail($ctx, $err); + $self->zflush; } sub close {} # noop diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index c964dbd41..8623440b8 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -13,8 +13,7 @@ use base qw(Exporter); our @EXPORT_OK = qw(html_oneshot); use bytes (); # length use PublicInbox::Hval qw(ascii_html prurl); -use Compress::Raw::Zlib qw(Z_FINISH Z_OK); -use PublicInbox::GzipFilter qw(gzip_maybe gzf_maybe); +use PublicInbox::GzipFilter qw(gzf_maybe); our $TOR_URL = 'https://www.torproject.org/'; our $CODE_URL = 'https://public-inbox.org/public-inbox.git'; @@ -190,25 +189,17 @@ sub html_oneshot ($$;$) { base_url => base_url($ctx), }, __PACKAGE__; my @x; - my $h = [ 'Content-Type' => 'text/html; charset=UTF-8' ]; - if (my $gz = gzip_maybe($h, $ctx->{env})) { - my $err = $gz->deflate(_html_top($self), $x[0]); - die "gzip->deflate: $err" if $err != Z_OK; - if ($sref) { - $err = $gz->deflate($sref, $x[0]); - die "gzip->deflate: $err" if $err != Z_OK; - } - $err = $gz->deflate(_html_end($self), $x[0]); - die "gzip->deflate: $err" if $err != Z_OK; - $err = $gz->flush($x[0], Z_FINISH); - die "gzip->flush: $err" if $err != Z_OK; + my $h = [ 'Content-Type' => 'text/html; charset=UTF-8', + 'Content-Length' => undef ]; + if (my $gzf = gzf_maybe($h, $ctx->{env})) { + $gzf->zmore(_html_top($self)); + $gzf->zmore($$sref) if $sref; + $x[0] = $gzf->zflush(_html_end($self)); + $h->[3] = length($x[0]); } else { @x = (_html_top($self), $sref ? $$sref : (), _html_end($self)); + $h->[3] += bytes::length($_) for @x; } - - my $len = 0; - $len += bytes::length($_) for @x; - push @$h, 'Content-Length', $len; [ $code, $h, \@x ] }