From ab9c03ff4aa369b397dc1a8c8936153c8565fd05 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 10 Sep 2022 08:17:19 +0000 Subject: www: use PerlIO::scalar (zfh) for buffering Calling Compress::Raw::Zlib::deflate is fairly expensive. Relying on the `.=' (concat) operator inside ->zadd operator is faster, but the method dispatch overhead is noticeable compared to the original code where we had bare `.=' littered throughout. Fortunately, `print' and `say' with the PerlIO::scalar IO layer appears to offer better performance without high method dispatch overhead. This doesn't allow us to save as much memory as I originally hoped, but does allow us to rely less on concat operators in other places and just pass a list of args to `print' and `say' as a appropriate. This does reduce scratchpad use, however, allowing for large memory savings, and we still ->deflate every single $eml. --- lib/PublicInbox/GzipFilter.pm | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'lib/PublicInbox/GzipFilter.pm') diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index 1f11acb8..848370ce 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -127,38 +127,39 @@ sub write { http_out($_[0])->write(translate($_[0], $_[1])); } -sub zadd { - my $self = shift; - $self->{pbuf} .= $_ for @_; # perl internal pad memory use here +sub zfh { + $_[0]->{zfh} // do { + open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or + die "open: $!"; + $_[0]->{zfh} + }; } # similar to ->translate; use this when we're sure we know we have # more data to buffer after this sub zmore { - my $self = shift; # $_[1] => input + my $self = shift; + my $zfh = delete $self->{zfh}; + if (@_ > 1 || $zfh) { + print { $zfh // zfh($self) } @_; + @_ = (delete $self->{pbuf}); + delete $self->{zfh}; + }; http_out($self); - my $x; - defined($x = delete($self->{pbuf})) and unshift(@_, $x); - for (@_) { - ($x = $self->{gz}->deflate($_, $self->{zbuf})) == Z_OK or - die "gzip->deflate: $x"; - } - undef; + my $err; + ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or + die "gzip->deflate: $err"; } # flushes and returns the final bit of gzipped data sub zflush ($;@) { my $self = shift; # $_[1..Inf] => final input (optional) + zmore($self, @_) if scalar(@_) || $self->{zfh}; + # not a bug, recursing on DS->write failure + my $gz = delete $self->{gz} // return ''; + my $err; my $zbuf = delete $self->{zbuf}; - my $gz = delete $self->{gz}; - my $x; - defined($x = delete($self->{pbuf})) and unshift(@_, $x); - for (@_) { # it's a bug iff $gz is undef if @_ isn't empty, here: - ($x = $gz->deflate($_, $zbuf)) == Z_OK or - die "gzip->deflate: $x"; - } - $gz // return ''; # not a bug, recursing on DS->write failure - ($x = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $x"; + ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err"; $zbuf; } -- cgit v1.2.3-24-ge0c7