about summary refs log tree commit homepage
path: root/lib/PublicInbox/GzipFilter.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2022-09-10 08:17:19 +0000
committerEric Wong <e@80x24.org>2022-09-10 19:50:48 +0000
commitab9c03ff4aa369b397dc1a8c8936153c8565fd05 (patch)
tree8bc61ae89d8f967ea948918d0478db291011d159 /lib/PublicInbox/GzipFilter.pm
parentf32456e0d0f4a7756fcc17c83ccf5b682cb512d9 (diff)
downloadpublic-inbox-ab9c03ff4aa369b397dc1a8c8936153c8565fd05.tar.gz
Calling Compress::Raw::Zlib::deflate is fairly expensive.
Relying on the `.=' (concat) operator inside ->zadd operator is
faster, but the method dispatch overhead is noticeable compared
to the original code where we had bare `.=' littered throughout.

Fortunately, `print' and `say' with the PerlIO::scalar IO layer
appears to offer better performance without high method dispatch
overhead.  This doesn't allow us to save as much memory as I
originally hoped, but does allow us to rely less on concat
operators in other places and just pass a list of args to
`print' and `say' as a appropriate.

This does reduce scratchpad use, however, allowing for large
memory savings, and we still ->deflate every single $eml.
Diffstat (limited to 'lib/PublicInbox/GzipFilter.pm')
-rw-r--r--lib/PublicInbox/GzipFilter.pm41
1 files changed, 21 insertions, 20 deletions
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index 1f11acb8..848370ce 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -127,38 +127,39 @@ sub write {
         http_out($_[0])->write(translate($_[0], $_[1]));
 }
 
-sub zadd {
-        my $self = shift;
-        $self->{pbuf} .= $_ for @_; # perl internal pad memory use here
+sub zfh {
+        $_[0]->{zfh} // do {
+                open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or
+                        die "open: $!";
+                $_[0]->{zfh}
+        };
 }
 
 # similar to ->translate; use this when we're sure we know we have
 # more data to buffer after this
 sub zmore {
-        my $self = shift; # $_[1] => input
+        my $self = shift;
+        my $zfh = delete $self->{zfh};
+        if (@_ > 1 || $zfh) {
+                print { $zfh // zfh($self) } @_;
+                @_ = (delete $self->{pbuf});
+                delete $self->{zfh};
+        };
         http_out($self);
-        my $x;
-        defined($x = delete($self->{pbuf})) and unshift(@_, $x);
-        for (@_) {
-                ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or
-                        die "gzip->deflate: $x";
-        }
-        undef;
+        my $err;
+        ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or
+                die "gzip->deflate: $err";
 }
 
 # flushes and returns the final bit of gzipped data
 sub zflush ($;@) {
         my $self = shift; # $_[1..Inf] => final input (optional)
+        zmore($self, @_) if scalar(@_) || $self->{zfh};
+        # not a bug, recursing on DS->write failure
+        my $gz = delete $self->{gz} // return '';
+        my $err;
         my $zbuf = delete $self->{zbuf};
-        my $gz = delete $self->{gz};
-        my $x;
-        defined($x = delete($self->{pbuf})) and unshift(@_, $x);
-        for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here:
-                ($x = $gz->deflate($_, $zbuf)) == Z_OK or
-                        die "gzip->deflate: $x";
-        }
-        $gz // return ''; # not a bug, recursing on DS->write failure
-        ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x";
+        ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err";
         $zbuf;
 }