about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-05 23:27:24 +0000
committerEric Wong <e@yhbt.net>2020-07-06 20:01:15 +0000
commitf26183401e3abfb64ad82537151f2718ac889074 (patch)
treeff5118417538eae8375c9bdc5023ceca1d1e647a /lib
parent967f6d1b1626392ee4340ea356a00651462377b3 (diff)
downloadpublic-inbox-f26183401e3abfb64ad82537151f2718ac889074.tar.gz
The new ->zmore and ->zflush APIs make it possible to replace
existing verbose usages of Compress::Raw::Deflate and simplify
buffering logic for streaming large gzipped data.

One potentially user visible change is we now break the mbox.gz
response on zlib failures, instead of silently continuing onto
the next message.  zlib only seems to fail on OOM, which should
be rare; so it's ideal we drop the connection anyways.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/GzipFilter.pm27
-rw-r--r--lib/PublicInbox/MboxGz.pm41
-rw-r--r--lib/PublicInbox/WwwStream.pm27
3 files changed, 31 insertions, 64 deletions
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 8cc5ea00..d2eb4e66 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -6,7 +6,7 @@ package PublicInbox::GzipFilter;
 use strict;
 use parent qw(Exporter);
 use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
-our @EXPORT_OK = qw(gzip_maybe gzf_maybe);
+our @EXPORT_OK = qw(gzf_maybe);
 my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
 my @GZIP_HDRS = qw(Vary Accept-Encoding Content-Encoding gzip);
 
@@ -19,24 +19,23 @@ sub attach {
         $self
 }
 
-sub gzip_maybe ($$) {
+# returns `0' and not `undef' on failure (see Www*Stream)
+sub gzf_maybe ($$) {
         my ($res_hdr, $env) = @_;
-        return if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/;
-
+        return 0 if (($env->{HTTP_ACCEPT_ENCODING}) // '') !~ /\bgzip\b/;
         my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
-        return if $err != Z_OK;
+        return 0 if $err != Z_OK;
 
         # in case Plack::Middleware::Deflater is loaded:
         $env->{'plack.skip-deflater'} = 1;
-
         push @$res_hdr, @GZIP_HDRS;
-        $gz;
+        bless { gz => $gz }, __PACKAGE__;
 }
 
-sub gzf_maybe ($$) {
-        my ($res_hdr, $env) = @_;
-        my $gz = gzip_maybe($res_hdr, $env) or return 0;
-        bless { gz => $gz }, __PACKAGE__;
+sub gzip_or_die () {
+        my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
+        $err == Z_OK or die "Deflate->new failed: $err";
+        $gz;
 }
 
 # for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'}
@@ -47,11 +46,7 @@ sub translate ($$) {
         # allocate the zlib context lazily here, instead of in ->new.
         # Deflate contexts are memory-intensive and this object may
         # be sitting in the Qspawn limiter queue for a while.
-        my $gz = $self->{gz} //= do {
-                my ($g, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
-                $err == Z_OK or die "Deflate->new failed: $err";
-                $g;
-        };
+        my $gz = $self->{gz} //= gzip_or_die();
         my $zbuf = delete($self->{zbuf});
         if (defined $_[1]) { # my $buf = $_[1];
                 my $err = $gz->deflate($_[1], $zbuf);
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index f7fc4afc..535ef96c 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -2,19 +2,19 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 package PublicInbox::MboxGz;
 use strict;
-use warnings;
+use parent 'PublicInbox::GzipFilter';
 use PublicInbox::Eml;
 use PublicInbox::Hval qw/to_filename/;
 use PublicInbox::Mbox;
-use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
-my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
 
 sub new {
         my ($class, $ctx, $cb) = @_;
         $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
-        my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
-        $err == Z_OK or die "Deflate->new failed: $err";
-        bless { gz => $gz, cb => $cb, ctx => $ctx }, $class;
+        bless {
+                gz => PublicInbox::GzipFilter::gzip_or_die(),
+                cb => $cb,
+                ctx => $ctx
+        }, $class;
 }
 
 sub response {
@@ -27,40 +27,21 @@ sub response {
         [ 200, $h, $body ];
 }
 
-sub gzip_fail ($$) {
-        my ($ctx, $err) = @_;
-        $ctx->{env}->{'psgi.errors'}->print("deflate failed: $err\n");
-        '';
-}
-
 # called by Plack::Util::foreach or similar
 sub getline {
         my ($self) = @_;
         my $ctx = $self->{ctx} or return;
-        my $gz = $self->{gz};
-        my $buf = delete($self->{buf});
         while (my $smsg = $self->{cb}->($ctx)) {
                 my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
                 my $h = PublicInbox::Eml->new($mref)->header_obj;
-
-                my $err = $gz->deflate(
-                        PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}),
-                        $buf);
-                return gzip_fail($ctx, $err) if $err != Z_OK;
-
-                $err = $gz->deflate(PublicInbox::Mbox::msg_body($$mref), $buf);
-                return gzip_fail($ctx, $err) if $err != Z_OK;
-
-                return $buf if length($buf) >= 8192;
-
-                # be fair to other clients on public-inbox-httpd:
-                $self->{buf} = $buf;
-                return '';
+                $self->zmore(
+                        PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid})
+                );
+                return $self->translate(PublicInbox::Mbox::msg_body($$mref));
         }
         # signal that we're done and can return undef next call:
         delete $self->{ctx};
-        my $err = $gz->flush($buf, Z_FINISH);
-        ($err == Z_OK) ? $buf : gzip_fail($ctx, $err);
+        $self->zflush;
 }
 
 sub close {} # noop
diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm
index c964dbd4..8623440b 100644
--- a/lib/PublicInbox/WwwStream.pm
+++ b/lib/PublicInbox/WwwStream.pm
@@ -13,8 +13,7 @@ use base qw(Exporter);
 our @EXPORT_OK = qw(html_oneshot);
 use bytes (); # length
 use PublicInbox::Hval qw(ascii_html prurl);
-use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
-use PublicInbox::GzipFilter qw(gzip_maybe gzf_maybe);
+use PublicInbox::GzipFilter qw(gzf_maybe);
 our $TOR_URL = 'https://www.torproject.org/';
 our $CODE_URL = 'https://public-inbox.org/public-inbox.git';
 
@@ -190,25 +189,17 @@ sub html_oneshot ($$;$) {
                 base_url => base_url($ctx),
         }, __PACKAGE__;
         my @x;
-        my $h = [ 'Content-Type' => 'text/html; charset=UTF-8' ];
-        if (my $gz = gzip_maybe($h, $ctx->{env})) {
-                my $err = $gz->deflate(_html_top($self), $x[0]);
-                die "gzip->deflate: $err" if $err != Z_OK;
-                if ($sref) {
-                        $err = $gz->deflate($sref, $x[0]);
-                        die "gzip->deflate: $err" if $err != Z_OK;
-                }
-                $err = $gz->deflate(_html_end($self), $x[0]);
-                die "gzip->deflate: $err" if $err != Z_OK;
-                $err = $gz->flush($x[0], Z_FINISH);
-                die "gzip->flush: $err" if $err != Z_OK;
+        my $h = [ 'Content-Type' => 'text/html; charset=UTF-8',
+                'Content-Length' => undef ];
+        if (my $gzf = gzf_maybe($h, $ctx->{env})) {
+                $gzf->zmore(_html_top($self));
+                $gzf->zmore($$sref) if $sref;
+                $x[0] = $gzf->zflush(_html_end($self));
+                $h->[3] = length($x[0]);
         } else {
                 @x = (_html_top($self), $sref ? $$sref : (), _html_end($self));
+                $h->[3] += bytes::length($_) for @x;
         }
-
-        my $len = 0;
-        $len += bytes::length($_) for @x;
-        push @$h, 'Content-Length', $len;
         [ $code, $h, \@x ]
 }