From ec24b4af88d7922e4bf9846e76b9455fb6a5bda4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 4 Oct 2017 22:54:23 +0000 Subject: mbox: support inline filename via Content-Disposition header This is hopefully more sensical than "raw" files from resulting downloads. --- lib/PublicInbox/Hval.pm | 12 +++++++++- lib/PublicInbox/Mbox.pm | 64 ++++++++++++++++++++++++++++++++++++++++--------- t/hval.t | 10 ++++++++ 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm index 8d36fc2b..00a923ea 100644 --- a/lib/PublicInbox/Hval.pm +++ b/lib/PublicInbox/Hval.pm @@ -9,7 +9,7 @@ use warnings; use Encode qw(find_encoding); use PublicInbox::MID qw/mid_clean mid_escape/; use base qw/Exporter/; -our @EXPORT_OK = qw/ascii_html obfuscate_addrs/; +our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename/; # for user-generated content (UGC) which may have excessively long lines # and screw up rendering on some browsers. This is the only CSS style @@ -106,4 +106,14 @@ sub obfuscate_addrs ($$) { /sge; } +# like format_sanitized_subject in git.git pretty.c with '%f' format string +sub to_filename ($) { + my ($s, undef) = split(/\n/, $_[0]); + $s =~ s/[^A-Za-z0-9_\.]+/-/g; + $s =~ tr/././s; + $s =~ s/[\.\-]+\z//; + $s =~ s/\A[\.\-]+//; + $s +} + 1; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 88daba75..2ea326a5 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -7,17 +7,43 @@ package PublicInbox::Mbox; use strict; use warnings; use PublicInbox::MID qw/mid_clean mid_escape/; -require Email::Simple; +use PublicInbox::Hval qw/to_filename/; +use Email::Simple; +use Email::MIME::Encode; + +sub subject_fn ($) { + my ($simple) = @_; + my $fn = $simple->header('Subject'); + return 'no-subject' unless defined($fn); + + # no need for full Email::MIME, here + if ($fn =~ /=\?/) { + eval { $fn = Encode::decode('MIME-Header', $fn) }; + $fn = 'no-subject' if $@; + } + $fn =~ s/^re:\s+//i; + $fn = to_filename($fn); + $fn eq '' ? 'no-subject' : $fn; +} sub emit1 { my ($ctx, $msg) = @_; $msg = Email::Simple->new($msg); + my $fn = subject_fn($msg); + my @hdr = ('Content-Type'); + if ($ctx->{-inbox}->{obfuscate}) { + # obfuscation is stupid, but maybe scrapers are, too... + push @hdr, 'application/mbox'; + $fn .= '.mbox'; + } else { + push @hdr, 'text/plain'; + $fn .= '.txt'; + } + push @hdr, 'Content-Disposition', "inline; filename=$fn"; # single message should be easily renderable in browsers, # unless obfuscation is enabled :< - [ 200, [ 'Content-Type', - $ctx->{-inbox}->{obfuscate} ? 'application/mbox' : 'text/plain' ], - [ msg_str($ctx, $msg)] ] + [ 200, \@hdr, [ msg_str($ctx, $msg) ] ] } sub msg_str { @@ -69,9 +95,7 @@ sub thread_mbox { return sub { need_gzip(@_) } if $@; my $cb = sub { $srch->get_thread($ctx->{mid}, @_) }; - # http://www.iana.org/assignments/media-types/application/gzip - [200, ['Content-Type' => 'application/gzip'], - PublicInbox::MboxGz->new($ctx, $cb) ]; + PublicInbox::MboxGz->response($ctx, $cb); } sub emit_range { @@ -85,11 +109,9 @@ sub emit_range { } else { return [404, [qw(Content-Type text/plain)], []]; } - my $cb = sub { $ctx->{srch}->query($query, @_) }; - # http://www.iana.org/assignments/media-types/application/gzip - [200, [qw(Content-Type application/gzip)], - PublicInbox::MboxGz->new($ctx, $cb) ]; + my $cb = sub { $ctx->{srch}->query($query, @_) }; + PublicInbox::MboxGz->response($ctx, $cb); } sub need_gzip { @@ -123,6 +145,15 @@ sub new { }, $class; } +sub response { + my ($class, $ctx, $cb) = @_; + my $body = $class->new($ctx, $cb); + # http://www.iana.org/assignments/media-types/application/gzip + $body->{hdr} = [ 'Content-Type', 'application/gzip' ]; + my $hdr = $body->getline; # fill in Content-Disposition filename + [ 200, $hdr, $body ]; +} + # called by Plack::Util::foreach or similar sub getline { my ($self) = @_; @@ -131,10 +162,19 @@ sub getline { my $ibx = $ctx->{-inbox}; my $gz = $self->{gz}; do { + # work on existing result set while (defined(my $smsg = shift @{$self->{msgs}})) { my $msg = eval { $ibx->msg_by_smsg($smsg) } or next; $msg = Email::Simple->new($msg); $gz->write(PublicInbox::Mbox::msg_str($ctx, $msg)); + + # use subject of first message as subject + if (my $hdr = delete $self->{hdr}) { + my $fn = PublicInbox::Mbox::subject_fn($msg); + push @$hdr, 'Content-Disposition', + "inline; filename=$fn.mbox.gz"; + return $hdr; + } my $bref = $self->{buf}; if (length($$bref) >= 8192) { my $ret = $$bref; # copy :< @@ -145,6 +185,8 @@ sub getline { # be fair to other clients on public-inbox-httpd: return ''; } + + # refill result set $res = $self->{cb}->($self->{opts}); $self->{msgs} = $res->{msgs}; $res = scalar @{$self->{msgs}}; diff --git a/t/hval.t b/t/hval.t index a3712666..2af4d2af 100644 --- a/t/hval.t +++ b/t/hval.t @@ -32,4 +32,14 @@ EOF is($html, $exp, 'only obfuscated relevant addresses'); +is('foo-bar', PublicInbox::Hval::to_filename('foo bar '), + 'to_filename has no trailing -'); + +is('foo-bar', PublicInbox::Hval::to_filename("foo bar\nanother line\n"), + 'to_filename has no repeated -, and nothing past LF'); + +is('foo.bar', PublicInbox::Hval::to_filename("foo....bar"), + 'to_filename squeezes -'); + + done_testing(); -- cgit v1.2.3-24-ge0c7