From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DE9621F5A5 for ; Sat, 16 Nov 2019 02:34:39 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/3] mboxgz: use Compress::Raw::Zlib instead of IO::Compress::Gzip Date: Sat, 16 Nov 2019 02:34:39 +0000 Message-Id: <20191116023439.32410-4-e@80x24.org> In-Reply-To: <20191116023439.32410-1-e@80x24.org> References: <20191116023439.32410-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: IO::Compress::Gzip is a wrapper around Compress::Raw::Zlib, anyways, and being able to easily detach buffers to return them via ->getline is nice. This results in a 1-2% performance improvement when fetching giant mboxes. --- lib/PublicInbox/Mbox.pm | 2 +- lib/PublicInbox/MboxGz.pm | 41 +++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 42ed8c5d..42cedd15 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -231,7 +231,7 @@ sub need_gzip { my $title = 'gzipped mbox not available'; $fh->write(<$title
$title
-The administrator needs to install the IO::Compress::Gzip Perl module
+The administrator needs to install the Compress::Raw::Zlib Perl module
 to support gzipped mboxes.
 Return to index
EOF diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm index 2919ad6a..2a55447f 100644 --- a/lib/PublicInbox/MboxGz.pm +++ b/lib/PublicInbox/MboxGz.pm @@ -7,17 +7,15 @@ use Email::Simple; use PublicInbox::Hval qw/to_filename/; use PublicInbox::Mbox; use IO::Compress::Gzip; +use Compress::Raw::Zlib qw(Z_FINISH Z_OK); +my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1); sub new { my ($class, $ctx, $cb) = @_; - my $buf = ''; $ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env}); - bless { - buf => \$buf, - gz => IO::Compress::Gzip->new(\$buf, Time => 0), - cb => $cb, - ctx => $ctx, - }, $class; + my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT); + $err == Z_OK or die "Deflate->new failed: $err"; + bless { gz => $gz, cb => $cb, ctx => $ctx }, $class; } sub response { @@ -32,31 +30,40 @@ sub response { [ 200, \@h, $body ]; } +sub gzip_fail ($$) { + my ($ctx, $err) = @_; + $ctx->{env}->{'psgi.errors'}->print("deflate failed: $err\n"); + ''; +} + # called by Plack::Util::foreach or similar sub getline { my ($self) = @_; my $ctx = $self->{ctx} or return; my $gz = $self->{gz}; + my $buf = delete($self->{buf}); while (my $smsg = $self->{cb}->()) { my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next; my $h = Email::Simple->new($mref)->header_obj; - $gz->write(PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid})); - $gz->write(PublicInbox::Mbox::msg_body($$mref)); - my $bref = $self->{buf}; - if (length($$bref) >= 8192) { - my $ret = $$bref; # copy :< - ${$self->{buf}} = ''; - return $ret; - } + my $err = $gz->deflate( + PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}), + $buf); + return gzip_fail($ctx, $err) if $err != Z_OK; + + $err = $gz->deflate(PublicInbox::Mbox::msg_body($$mref), $buf); + return gzip_fail($ctx, $err) if $err != Z_OK; + + return $buf if length($buf) >= 8192; # be fair to other clients on public-inbox-httpd: + $self->{buf} = $buf; return ''; } - delete($self->{gz})->close; # signal that we're done and can return undef next call: delete $self->{ctx}; - ${delete $self->{buf}}; + my $err = $gz->flush($buf, Z_FINISH); + $err == Z_OK ? $buf : gzip_fail($ctx, $err); } sub close {} # noop