about summary refs log tree commit homepage
path: root/lib/PublicInbox/WwwAttach.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-07-05 23:27:56 +0000
committerEric Wong <e@yhbt.net>2020-07-06 20:01:15 +0000
commit6df377a693070bcbfa63b681f329a353457dbe7f (patch)
treec16cae63a78fd1da70cc0d82f70d52e572d137ae /lib/PublicInbox/WwwAttach.pm
parent6bcab55b2594368e5f8aad0badb8d51d5d8ba20f (diff)
downloadpublic-inbox-6df377a693070bcbfa63b681f329a353457dbe7f.tar.gz
We can reuse some of the GzipFilter infrastructure used by other
WWW components to handle slow blob retrieval, here.  The
difference from previous changes is we don't decide on the 200
status code until we've retrieved the blob and found the
attachment.

While we're at it, ensure we can compress text attachment
responses once again, since all text attachments are served
as text/plain.
Diffstat (limited to 'lib/PublicInbox/WwwAttach.pm')
-rw-r--r--lib/PublicInbox/WwwAttach.pm63
1 files changed, 52 insertions, 11 deletions
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 7e8496d7..20417295 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -4,15 +4,16 @@
 # For retrieving attachments from messages in the WWW interface
 package PublicInbox::WwwAttach; # internal package
 use strict;
-use warnings;
+use parent qw(PublicInbox::GzipFilter);
 use bytes (); # only for bytes::length
 use PublicInbox::EmlContentFoo qw(parse_content_type);
 use PublicInbox::Eml;
 
 sub get_attach_i { # ->each_part callback
         my ($part, $depth, $idx) = @{$_[0]};
-        my $res = $_[1];
-        return if $idx ne $res->[3]; # [0-9]+(?:\.[0-9]+)+
+        my $ctx = $_[1];
+        return if $idx ne $ctx->{idx}; # [0-9]+(?:\.[0-9]+)+
+        my $res = $ctx->{res};
         $res->[0] = 200;
         my $ct = $part->content_type;
         $ct = parse_content_type($ct) if $ct;
@@ -23,24 +24,64 @@ sub get_attach_i { # ->each_part callback
                 if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) {
                         $res->[1]->[1] .= qq(; charset=$cset);
                 }
+                $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res->[1],
+                                                                $ctx->{env});
+                $part = $ctx->zflush($part->body);
         } else { # TODO: allow user to configure safe types
                 $res->[1]->[1] = 'application/octet-stream';
+                $part = $part->body;
         }
-        $part = $part->body;
         push @{$res->[1]}, 'Content-Length', bytes::length($part);
         $res->[2]->[0] = $part;
 }
 
+sub async_eml { # ->{async_eml} for async_blob_cb
+        my ($ctx, $eml) = @_;
+        eval { $eml->each_part(\&get_attach_i, $ctx, 1) };
+        if ($@) {
+                $ctx->{res}->[0] = 500;
+                warn "E: $@";
+        }
+}
+
+sub async_next {
+        my ($http) = @_;
+        my $ctx = $http->{forward} or return; # client aborted
+        # finally, we call the user-supplied callback
+        eval { $ctx->{wcb}->($ctx->{res}) };
+        warn "E: $@" if $@;
+}
+
+sub scan_attach ($) { # public-inbox-httpd only
+        my ($ctx) = @_;
+        $ctx->{env}->{'psgix.io'}->{forward} = $ctx;
+        $ctx->{async_eml} = \&async_eml;
+        $ctx->{async_next} = \&async_next;
+        $ctx->smsg_blob($ctx->{smsg});
+}
+
 # /$LISTNAME/$MESSAGE_ID/$IDX-$FILENAME
 sub get_attach ($$$) {
         my ($ctx, $idx, $fn) = @_;
-        my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
-        my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
-        $mime = PublicInbox::Eml->new($mime);
-        $res->[3] = $idx;
-        $mime->each_part(\&get_attach_i, $res, 1);
-        pop @$res; # cleanup before letting PSGI server see it
-        $res
+        $ctx->{res} = [ 404, [ 'Content-Type' => 'text/plain' ],
+                                [ "Not found\n" ] ];
+        $ctx->{idx} = $idx;
+        bless $ctx, __PACKAGE__;
+        my $eml;
+        if ($ctx->{smsg} = $ctx->{-inbox}->smsg_by_mid($ctx->{mid})) {
+                return sub { # public-inbox-httpd-only
+                        $ctx->{wcb} = $_[0];
+                        scan_attach($ctx);
+                } if $ctx->{env}->{'pi-httpd.async'};
+                # generic PSGI:
+                $eml = $ctx->{-inbox}->smsg_eml($ctx->{smsg});
+        } elsif (!$ctx->{-inbox}->over) {
+                if (my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid})) {
+                        $eml = PublicInbox::Eml->new($bref);
+                }
+        }
+        $eml->each_part(\&get_attach_i, $ctx, 1) if $eml;
+        $ctx->{res}
 }
 
 1;