From 6df377a693070bcbfa63b681f329a353457dbe7f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Jul 2020 23:27:56 +0000 Subject: wwwattach: support async blob retrievals We can reuse some of the GzipFilter infrastructure used by other WWW components to handle slow blob retrieval, here. The difference from previous changes is we don't decide on the 200 status code until we've retrieved the blob and found the attachment. While we're at it, ensure we can compress text attachment responses once again, since all text attachments are served as text/plain. --- lib/PublicInbox/WwwAttach.pm | 63 ++++++++++++++--- t/psgi_attach.t | 162 ++++++++++++++++++++++++------------------- 2 files changed, 144 insertions(+), 81 deletions(-) diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index 7e8496d7..20417295 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -4,15 +4,16 @@ # For retrieving attachments from messages in the WWW interface package PublicInbox::WwwAttach; # internal package use strict; -use warnings; +use parent qw(PublicInbox::GzipFilter); use bytes (); # only for bytes::length use PublicInbox::EmlContentFoo qw(parse_content_type); use PublicInbox::Eml; sub get_attach_i { # ->each_part callback my ($part, $depth, $idx) = @{$_[0]}; - my $res = $_[1]; - return if $idx ne $res->[3]; # [0-9]+(?:\.[0-9]+)+ + my $ctx = $_[1]; + return if $idx ne $ctx->{idx}; # [0-9]+(?:\.[0-9]+)+ + my $res = $ctx->{res}; $res->[0] = 200; my $ct = $part->content_type; $ct = parse_content_type($ct) if $ct; @@ -23,24 +24,64 @@ sub get_attach_i { # ->each_part callback if ($cset && ($cset =~ /\A[a-zA-Z0-9_\-]+\z/)) { $res->[1]->[1] .= qq(; charset=$cset); } + $ctx->{gz} = PublicInbox::GzipFilter::gz_or_noop($res->[1], + $ctx->{env}); + $part = $ctx->zflush($part->body); } else { # TODO: allow user to configure safe types $res->[1]->[1] = 'application/octet-stream'; + $part = $part->body; } - $part = $part->body; push @{$res->[1]}, 'Content-Length', bytes::length($part); $res->[2]->[0] = $part; } +sub async_eml { # ->{async_eml} for async_blob_cb + my ($ctx, $eml) = @_; + eval { $eml->each_part(\&get_attach_i, $ctx, 1) }; + if ($@) { + $ctx->{res}->[0] = 500; + warn "E: $@"; + } +} + +sub async_next { + my ($http) = @_; + my $ctx = $http->{forward} or return; # client aborted + # finally, we call the user-supplied callback + eval { $ctx->{wcb}->($ctx->{res}) }; + warn "E: $@" if $@; +} + +sub scan_attach ($) { # public-inbox-httpd only + my ($ctx) = @_; + $ctx->{env}->{'psgix.io'}->{forward} = $ctx; + $ctx->{async_eml} = \&async_eml; + $ctx->{async_next} = \&async_next; + $ctx->smsg_blob($ctx->{smsg}); +} + # /$LISTNAME/$MESSAGE_ID/$IDX-$FILENAME sub get_attach ($$$) { my ($ctx, $idx, $fn) = @_; - my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ]; - my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res; - $mime = PublicInbox::Eml->new($mime); - $res->[3] = $idx; - $mime->each_part(\&get_attach_i, $res, 1); - pop @$res; # cleanup before letting PSGI server see it - $res + $ctx->{res} = [ 404, [ 'Content-Type' => 'text/plain' ], + [ "Not found\n" ] ]; + $ctx->{idx} = $idx; + bless $ctx, __PACKAGE__; + my $eml; + if ($ctx->{smsg} = $ctx->{-inbox}->smsg_by_mid($ctx->{mid})) { + return sub { # public-inbox-httpd-only + $ctx->{wcb} = $_[0]; + scan_attach($ctx); + } if $ctx->{env}->{'pi-httpd.async'}; + # generic PSGI: + $eml = $ctx->{-inbox}->smsg_eml($ctx->{smsg}); + } elsif (!$ctx->{-inbox}->over) { + if (my $bref = $ctx->{-inbox}->msg_by_mid($ctx->{mid})) { + $eml = PublicInbox::Eml->new($bref); + } + } + $eml->each_part(\&get_attach_i, $ctx, 1) if $eml; + $ctx->{res} } 1; diff --git a/t/psgi_attach.t b/t/psgi_attach.t index 9a734f81..14d20adb 100644 --- a/t/psgi_attach.t +++ b/t/psgi_attach.t @@ -5,9 +5,8 @@ use warnings; use Test::More; use PublicInbox::TestCommon; my ($tmpdir, $for_destroy) = tmpdir(); -my $maindir = "$tmpdir/main.git"; +my $inboxdir = "$tmpdir/main.git"; my $addr = 'test-public@example.com'; -my $cfgpfx = "publicinbox.test"; my @mods = qw(HTTP::Request::Common Plack::Builder Plack::Test URI::Escape); require_mods(@mods); use_ok $_ foreach @mods; @@ -17,85 +16,108 @@ use PublicInbox::Git; use PublicInbox::Config; use PublicInbox::Eml; use_ok 'PublicInbox::WwwAttach'; -my $config = PublicInbox::Config->new(\<', $cfgpath or BAIL_OUT $!; +print $fh <new($maindir); +close $fh or BAIL_OUT $!; +my $config = PublicInbox::Config->new($cfgpath); +my $git = PublicInbox::Git->new($inboxdir); my $im = PublicInbox::Import->new($git, 'test', $addr); $im->init_bare; -{ - my $qp = "abcdef=g\n==blah\n"; - my $b64 = "b64\xde\xad\xbe\xef\n"; - my $txt = "plain\ntext\npass\nthrough\n"; - my $dot = "dotfile\n"; - $im->add(eml_load('t/psgi_attach.eml')); - $im->add(eml_load('t/data/message_embed.eml')); - $im->done; +my $qp = "abcdef=g\n==blah\n"; +my $b64 = "b64\xde\xad\xbe\xef\n"; +my $txt = "plain\ntext\npass\nthrough\n"; +my $dot = "dotfile\n"; +$im->add(eml_load('t/psgi_attach.eml')); +$im->add(eml_load('t/data/message_embed.eml')); +$im->done; + +my $www = PublicInbox::WWW->new($config); +my $client = sub { + my ($cb) = @_; + my $res; + $res = $cb->(GET('/test/Z%40B/')); + my @href = ($res->content =~ /^href="([^"]+)"/gms); + @href = grep(/\A[\d\.]+-/, @href); + is_deeply([qw(1-queue-pee 2-bayce-sixty-four 3-noop.txt + 4-a.txt)], + \@href, 'attachment links generated'); + + $res = $cb->(GET('/test/Z%40B/1-queue-pee')); + my $qp_res = $res->content; + ok(length($qp_res) >= length($qp), 'QP length is close'); + like($qp_res, qr/\n\z/s, 'trailing newline exists'); + # is(index($qp_res, $qp), 0, 'QP trailing newline is there'); + $qp_res =~ s/\r\n/\n/g; + is(index($qp_res, $qp), 0, 'QP trailing newline is there'); + + $res = $cb->(GET('/test/Z%40B/2-base-sixty-four')); + is(quotemeta($res->content), quotemeta($b64), + 'Base64 matches exactly'); - my $www = PublicInbox::WWW->new($config); - test_psgi(sub { $www->call(@_) }, sub { - my ($cb) = @_; - my $res; - $res = $cb->(GET('/test/Z%40B/')); - my @href = ($res->content =~ /^href="([^"]+)"/gms); - @href = grep(/\A[\d\.]+-/, @href); - is_deeply([qw(1-queue-pee 2-bayce-sixty-four 3-noop.txt - 4-a.txt)], - \@href, 'attachment links generated'); + $res = $cb->(GET('/test/Z%40B/3-noop.txt')); + my $txt_res = $res->content; + ok(length($txt_res) >= length($txt), + 'plain text almost matches'); + like($txt_res, qr/\n\z/s, 'trailing newline exists in text'); + is(index($txt_res, $txt), 0, 'plain text not truncated'); - $res = $cb->(GET('/test/Z%40B/1-queue-pee')); - my $qp_res = $res->content; - ok(length($qp_res) >= length($qp), 'QP length is close'); - like($qp_res, qr/\n\z/s, 'trailing newline exists'); - # is(index($qp_res, $qp), 0, 'QP trailing newline is there'); - $qp_res =~ s/\r\n/\n/g; - is(index($qp_res, $qp), 0, 'QP trailing newline is there'); + $res = $cb->(GET('/test/Z%40B/4-a.txt')); + my $dot_res = $res->content; + ok(length($dot_res) >= length($dot), 'dot almost matches'); + $res = $cb->(GET('/test/Z%40B/4-any-filename.txt')); + is($res->content, $dot_res, 'user-specified filename is OK'); - $res = $cb->(GET('/test/Z%40B/2-base-sixty-four')); - is(quotemeta($res->content), quotemeta($b64), - 'Base64 matches exactly'); + my $mid = '20200418222508.GA13918@dcvr'; + my $irt = '20200418222020.GA2745@dcvr'; + $res = $cb->(GET("/test/$mid/")); + unlike($res->content, qr! multipart/mixed, Size: 0 bytes!, + '0-byte download not offered'); + like($res->content, qr/\bhref="2-embed2x\.eml"/s, + 'href to message/rfc822 attachment visible'); + like($res->content, qr/\bhref="2\.1\.2-test\.eml"/s, + 'href to nested message/rfc822 attachment visible'); - $res = $cb->(GET('/test/Z%40B/3-noop.txt')); - my $txt_res = $res->content; - ok(length($txt_res) >= length($txt), - 'plain text almost matches'); - like($txt_res, qr/\n\z/s, 'trailing newline exists in text'); - is(index($txt_res, $txt), 0, 'plain text not truncated'); + $res = $cb->(GET("/test/$mid/2-embed2x.eml")); + my $eml = PublicInbox::Eml->new(\($res->content)); + is_deeply([ $eml->header_raw('Message-ID') ], [ "<$irt>" ], + 'got attached eml'); + my @subs = $eml->subparts; + is(scalar(@subs), 2, 'attachment had 2 subparts'); + like($subs[0]->body_str, qr/^testing embedded message\n*\z/sm, + '1st attachment is as expected'); + is($subs[1]->header('Content-Type'), 'message/rfc822', + '2nd attachment is as expected'); - $res = $cb->(GET('/test/Z%40B/4-a.txt')); - my $dot_res = $res->content; - ok(length($dot_res) >= length($dot), 'dot almost matches'); - $res = $cb->(GET('/test/Z%40B/4-any-filename.txt')); - is($res->content, $dot_res, 'user-specified filename is OK'); + $res = $cb->(GET("/test/$mid/2.1.2-test.eml")); + $eml = PublicInbox::Eml->new(\($res->content)); + is_deeply([ $eml->header_raw('Message-ID') ], + [ '<20200418214114.7575-1-e@yhbt.net>' ], + 'nested eml retrieved'); +}; - my $mid = '20200418222508.GA13918@dcvr'; - my $irt = '20200418222020.GA2745@dcvr'; - $res = $cb->(GET("/test/$mid/")); - unlike($res->content, qr! multipart/mixed, Size: 0 bytes!, - '0-byte download not offered'); - like($res->content, qr/\bhref="2-embed2x\.eml"/s, - 'href to message/rfc822 attachment visible'); - like($res->content, qr/\bhref="2\.1\.2-test\.eml"/s, - 'href to nested message/rfc822 attachment visible'); +test_psgi(sub { $www->call(@_) }, $client); +SKIP: { + diag 'testing with index indexed'; + require_mods('DBD::SQLite', 19); + my $env = { PI_CONFIG => $cfgpath }; + ok(run_script(['-index', $inboxdir], $env), 'indexed'); - $res = $cb->(GET("/test/$mid/2-embed2x.eml")); - my $eml = PublicInbox::Eml->new(\($res->content)); - is_deeply([ $eml->header_raw('Message-ID') ], [ "<$irt>" ], - 'got attached eml'); - my @subs = $eml->subparts; - is(scalar(@subs), 2, 'attachment had 2 subparts'); - like($subs[0]->body_str, qr/^testing embedded message\n*\z/sm, - '1st attachment is as expected'); - is($subs[1]->header('Content-Type'), 'message/rfc822', - '2nd attachment is as expected'); + test_psgi(sub { $www->call(@_) }, $client); - $res = $cb->(GET("/test/$mid/2.1.2-test.eml")); - $eml = PublicInbox::Eml->new(\($res->content)); - is_deeply([ $eml->header_raw('Message-ID') ], - [ '<20200418214114.7575-1-e@yhbt.net>' ], - 'nested eml retrieved'); - }); + require_mods(qw(Plack::Test::ExternalServer), 18); + my $sock = tcp_server() or die; + my ($out, $err) = map { "$inboxdir/std$_.log" } qw(out err); + my $cmd = [ qw(-httpd -W0), "--stdout=$out", "--stderr=$err" ]; + my $td = start_script($cmd, $env, { 3 => $sock }); + my ($h, $p) = ($sock->sockhost, $sock->sockport); + local $ENV{PLACK_TEST_EXTERNALSERVER_URI} = "http://$h:$p"; + Plack::Test::ExternalServer::test_psgi(client => $client); } done_testing(); -- cgit v1.2.3-24-ge0c7