diff options
Diffstat (limited to 'lib/PublicInbox/GzipFilter.pm')
-rw-r--r-- | lib/PublicInbox/GzipFilter.pm | 112 |
1 files changed, 60 insertions, 52 deletions
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index e37f1f76..8b630f25 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # In public-inbox <=1.5.0, public-inbox-httpd favored "getline" @@ -18,6 +18,7 @@ use Compress::Raw::Zlib qw(Z_OK); use PublicInbox::CompressNoop; use PublicInbox::Eml; use PublicInbox::GitAsyncCat; +use Carp qw(carp); our @EXPORT_OK = qw(gzf_maybe); my %OPT = (-WindowBits => 15 + 16, -AppendOutput => 1); @@ -92,30 +93,24 @@ sub gone { # what: search/over/mm undef; } -# for GetlineBody (via Qspawn) when NOT using $env->{'pi-httpd.async'} +# for GetlineResponse (via Qspawn) when NOT using $env->{'pi-httpd.async'} # Also used for ->getline callbacks -sub translate ($$) { - my $self = $_[0]; # $_[1] => input +sub translate { + my $self = shift; # $_[1] => input # allocate the zlib context lazily here, instead of in ->new. # Deflate contexts are memory-intensive and this object may # be sitting in the Qspawn limiter queue for a while. - my $gz = $self->{gz} //= gzip_or_die(); - my $zbuf = delete($self->{zbuf}); - if (defined $_[1]) { # my $buf = $_[1]; - my $err = $gz->deflate($_[1], $zbuf); - die "gzip->deflate: $err" if $err != Z_OK; - return $zbuf if length($zbuf) >= 8192; - - $self->{zbuf} = $zbuf; - ''; + $self->{gz} //= gzip_or_die(); + if (defined $_[0]) { # my $buf = $_[1]; + zmore($self, @_); + length($self->{zbuf}) >= 8192 ? delete($self->{zbuf}) : ''; } else { # undef == EOF - my $err = $gz->flush($zbuf); - die "gzip->flush: $err" if $err != Z_OK; - $zbuf; + $self->zflush; } } +# returns PublicInbox::HTTP::{Chunked,Identity} sub http_out ($) { my ($self) = @_; $self->{http_out} // do { @@ -128,73 +123,86 @@ sub http_out ($) { }; } +# returns undef if HTTP client disconnected, may return 0 +# because ->translate can return '' sub write { - # my $ret = bytes::length($_[1]); # XXX does anybody care? - http_out($_[0])->write(translate($_[0], $_[1])); + my $self = shift; + http_out($self)->write($self->translate(@_)); +} + +sub zfh { + $_[0]->{zfh} // do { + open($_[0]->{zfh}, '>>', \($_[0]->{pbuf} //= '')) or + die "open: $!"; + $_[0]->{zfh} + }; } # similar to ->translate; use this when we're sure we know we have # more data to buffer after this sub zmore { - my $self = $_[0]; # $_[1] => input + my $self = shift; + my $zfh = delete $self->{zfh}; + if (@_ > 1 || $zfh) { + print { $zfh // zfh($self) } @_; + @_ = (delete $self->{pbuf}); + delete $self->{zfh}; + }; http_out($self); - my $err = $self->{gz}->deflate($_[1], $self->{zbuf}); - die "gzip->deflate: $err" if $err != Z_OK; - undef; + my $err; + ($err = $self->{gz}->deflate($_[0], $self->{zbuf})) == Z_OK or + die "gzip->deflate: $err"; } # flushes and returns the final bit of gzipped data -sub zflush ($;$) { - my $self = $_[0]; # $_[1] => final input (optional) - my $zbuf = delete $self->{zbuf}; - my $gz = delete $self->{gz}; +sub zflush ($;@) { + my $self = shift; # $_[1..Inf] => final input (optional) + zmore($self, @_) if scalar(@_) || $self->{zfh}; + # not a bug, recursing on DS->write failure + my $gz = delete $self->{gz} // return ''; my $err; - if (defined $_[1]) { - $err = $gz->deflate($_[1], $zbuf); - die "gzip->deflate: $err" if $err != Z_OK; - } - $err = $gz->flush($zbuf); - die "gzip->flush: $err" if $err != Z_OK; + my $zbuf = delete $self->{zbuf}; + ($err = $gz->flush($zbuf)) == Z_OK or die "gzip->flush: $err"; $zbuf; } sub close { my ($self) = @_; my $http_out = http_out($self) // return; - $http_out->write(zflush($self)); - delete($self->{http_out})->close; + $http_out->write($self->zflush); + (delete($self->{http_out}) // return)->close; } -sub bail { +sub bail { my $self = shift; - if (my $env = $self->{env}) { - warn @_, "\n"; - my $http = $env->{'psgix.io'} or return; # client abort - eval { $http->close }; # should hit our close - warn "E: error in http->close: $@" if $@; - eval { $self->close }; # just in case... - warn "E: error in self->close: $@" if $@; - } else { - warn @_, "\n"; - } + carp @_; + my $env = $self->{env} or return; + my $http = $env->{'psgix.io'} or return; # client abort + eval { $http->close }; # should hit our close + carp "E: error in http->close: $@" if $@; + eval { $self->close }; # just in case... + carp "E: error in self->close: $@" if $@; } # this is public-inbox-httpd-specific sub async_blob_cb { # git->cat_async callback my ($bref, $oid, $type, $size, $self) = @_; - my $http = $self->{env}->{'psgix.io'}; + my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP $http->{forward} or return; # client aborted - my $smsg = $self->{smsg} or bail($self, 'BUG: no smsg'); - if (!defined($oid)) { + my $smsg = $self->{smsg} or return bail($self, 'BUG: no smsg'); + $type // return + bail($self, "abort: $smsg->{blob} $self->{ibx}->{inboxdir}"); + if ($type ne 'blob') { # it's possible to have TOCTOU if an admin runs # public-inbox-(edit|purge), just move onto the next message - warn "E: $smsg->{blob} missing in $self->{ibx}->{inboxdir}\n"; + warn "E: $smsg->{blob} $type in $self->{ibx}->{inboxdir}\n"; return $http->next_step($self->can('async_next')); } - $smsg->{blob} eq $oid or bail($self, "BUG: $smsg->{blob} != $oid"); + $smsg->{blob} eq $oid or return + bail($self, "BUG: $smsg->{blob} != $oid"); eval { $self->async_eml(PublicInbox::Eml->new($bref)) }; - bail($self, "E: async_eml: $@") if $@; - if ($self->{-low_prio}) { + return bail($self, "E: async_eml: $@") if $@; + if ($self->{-low_prio}) { # run via PublicInbox::WWW::event_step push(@{$self->{www}->{-low_prio_q}}, $self) == 1 and PublicInbox::DS::requeue($self->{www}); } else { |