From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] mboxgz: use Compress::Raw::Zlib instead of IO::Compress::Gzip
Date: Sat, 16 Nov 2019 02:34:39 +0000 [thread overview]
Message-ID: <20191116023439.32410-4-e@80x24.org> (raw)
In-Reply-To: <20191116023439.32410-1-e@80x24.org>
IO::Compress::Gzip is a wrapper around Compress::Raw::Zlib,
anyways, and being able to easily detach buffers to return them
via ->getline is nice. This results in a 1-2% performance
improvement when fetching giant mboxes.
---
lib/PublicInbox/Mbox.pm | 2 +-
lib/PublicInbox/MboxGz.pm | 41 +++++++++++++++++++++++----------------
2 files changed, 25 insertions(+), 18 deletions(-)
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 42ed8c5d..42cedd15 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -231,7 +231,7 @@ sub need_gzip {
my $title = 'gzipped mbox not available';
$fh->write(<<EOF);
<html><head><title>$title</title><body><pre>$title
-The administrator needs to install the IO::Compress::Gzip Perl module
+The administrator needs to install the Compress::Raw::Zlib Perl module
to support gzipped mboxes.
<a href="../">Return to index</a></pre></body></html>
EOF
diff --git a/lib/PublicInbox/MboxGz.pm b/lib/PublicInbox/MboxGz.pm
index 2919ad6a..2a55447f 100644
--- a/lib/PublicInbox/MboxGz.pm
+++ b/lib/PublicInbox/MboxGz.pm
@@ -7,17 +7,15 @@ use Email::Simple;
use PublicInbox::Hval qw/to_filename/;
use PublicInbox::Mbox;
use IO::Compress::Gzip;
+use Compress::Raw::Zlib qw(Z_FINISH Z_OK);
+my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1);
sub new {
my ($class, $ctx, $cb) = @_;
- my $buf = '';
$ctx->{base_url} = $ctx->{-inbox}->base_url($ctx->{env});
- bless {
- buf => \$buf,
- gz => IO::Compress::Gzip->new(\$buf, Time => 0),
- cb => $cb,
- ctx => $ctx,
- }, $class;
+ my ($gz, $err) = Compress::Raw::Zlib::Deflate->new(%OPT);
+ $err == Z_OK or die "Deflate->new failed: $err";
+ bless { gz => $gz, cb => $cb, ctx => $ctx }, $class;
}
sub response {
@@ -32,31 +30,40 @@ sub response {
[ 200, \@h, $body ];
}
+sub gzip_fail ($$) {
+ my ($ctx, $err) = @_;
+ $ctx->{env}->{'psgi.errors'}->print("deflate failed: $err\n");
+ '';
+}
+
# called by Plack::Util::foreach or similar
sub getline {
my ($self) = @_;
my $ctx = $self->{ctx} or return;
my $gz = $self->{gz};
+ my $buf = delete($self->{buf});
while (my $smsg = $self->{cb}->()) {
my $mref = $ctx->{-inbox}->msg_by_smsg($smsg) or next;
my $h = Email::Simple->new($mref)->header_obj;
- $gz->write(PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}));
- $gz->write(PublicInbox::Mbox::msg_body($$mref));
- my $bref = $self->{buf};
- if (length($$bref) >= 8192) {
- my $ret = $$bref; # copy :<
- ${$self->{buf}} = '';
- return $ret;
- }
+ my $err = $gz->deflate(
+ PublicInbox::Mbox::msg_hdr($ctx, $h, $smsg->{mid}),
+ $buf);
+ return gzip_fail($ctx, $err) if $err != Z_OK;
+
+ $err = $gz->deflate(PublicInbox::Mbox::msg_body($$mref), $buf);
+ return gzip_fail($ctx, $err) if $err != Z_OK;
+
+ return $buf if length($buf) >= 8192;
# be fair to other clients on public-inbox-httpd:
+ $self->{buf} = $buf;
return '';
}
- delete($self->{gz})->close;
# signal that we're done and can return undef next call:
delete $self->{ctx};
- ${delete $self->{buf}};
+ my $err = $gz->flush($buf, Z_FINISH);
+ $err == Z_OK ? $buf : gzip_fail($ctx, $err);
}
sub close {} # noop
next prev parent reply other threads:[~2019-11-16 2:34 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-16 2:34 [PATCH 0/3] start tidying up gzip-related code Eric Wong
2019-11-16 2:34 ` [PATCH 1/3] mbox: unused mid_clean import Eric Wong
2019-11-16 2:34 ` [PATCH 2/3] mbox: split mboxgz out into a separate file Eric Wong
2019-11-16 2:34 ` Eric Wong [this message]
2019-11-19 13:57 ` [PATCH 3/3] mboxgz: use Compress::Raw::Zlib instead of IO::Compress::Gzip SZEDER Gábor
2019-11-19 20:12 ` Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191116023439.32410-4-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).