From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8DD122018A for ; Sat, 25 Jun 2016 00:45:35 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 3/4] mbox: reduce small packets for gzipped mboxes Date: Sat, 25 Jun 2016 00:45:32 +0000 Message-Id: <20160625004533.5061-4-e@80x24.org> In-Reply-To: <20160625004533.5061-1-e@80x24.org> References: <20160625004533.5061-1-e@80x24.org> List-Id: We want to avoid sending 10 or 20-byte gzip headers as separate TCP packets to reduce syscalls and avoid wasting bandwidth. --- lib/PublicInbox/Mbox.pm | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 63ec605..1c97f95 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -110,7 +110,7 @@ use warnings; sub new { my ($class, $ctx, $cb) = @_; - my $buf; + my $buf = ''; bless { buf => \$buf, gz => IO::Compress::Gzip->new(\$buf, Time => 0), @@ -121,19 +121,11 @@ sub new { }, $class; } -sub _flush_buf { - my ($self) = @_; - my $ret = $self->{buf}; - $ret = $$ret; - ${$self->{buf}} = undef; - $ret; -} - # called by Plack::Util::foreach or similar sub getline { my ($self) = @_; + my $ctx = $self->{ctx} or return; my $res; - my $ctx = $self->{ctx}; my $ibx = $ctx->{-inbox}; my $gz = $self->{gz}; do { @@ -141,8 +133,12 @@ sub getline { my $msg = eval { $ibx->msg_by_mid($smsg->mid) } or next; $msg = Email::Simple->new($msg); $gz->write(PublicInbox::Mbox::msg_str($ctx, $msg)); - my $ret = _flush_buf($self); - return $ret if $ret; + my $bref = $self->{buf}; + if (length($$bref) >= 8192) { + my $ret = $$bref; # copy :< + ${$self->{buf}} = ''; + return $ret; + } } $res = $self->{cb}->($self->{opts}); $self->{msgs} = $res->{msgs}; @@ -150,7 +146,8 @@ sub getline { $self->{opts}->{offset} += $res; } while ($res); $gz->close; - _flush_buf($self); + delete $self->{ctx}; + ${delete $self->{buf}}; } sub close {} # noop