From 7319f5d318a960eeb32a207d226eea7fd9ce2543 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 25 Oct 2021 02:45:53 +0000 Subject: www: $MSGID/raw: set charset in HTTP response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using the charset specified in the message, web browsers are more likely to display the raw text properly for human readers. Inspired by a patch by Thomas Weißschuh: https://public-inbox.org/meta/20211024214337.161779-3-thomas@t-8ch.de/ Cc: Thomas Weißschuh --- lib/PublicInbox/GzipFilter.pm | 19 +++++++++++++------ lib/PublicInbox/Mbox.pm | 24 +++++++++++++----------- 2 files changed, 26 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm index c4858a97..e37f1f76 100644 --- a/lib/PublicInbox/GzipFilter.pm +++ b/lib/PublicInbox/GzipFilter.pm @@ -46,11 +46,10 @@ sub gz_or_noop { sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ } sub psgi_response { + # $code may be an HTTP response code (e.g. 200) or a CODE ref (mbox_hdr) my ($self, $code, $res_hdr) = @_; - my $env = $self->{env}; - $self->{gz} //= gz_or_noop($res_hdr, $env); - if ($env->{'pi-httpd.async'}) { - my $http = $env->{'psgix.io'}; # PublicInbox::HTTP + if ($self->{env}->{'pi-httpd.async'}) { + my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP $http->{forward} = $self; sub { my ($wcb) = @_; # -httpd provided write callback @@ -58,6 +57,9 @@ sub psgi_response { $self->can('async_next')->($http); # start stepping }; } else { # generic PSGI code path + ref($code) eq 'CODE' and + ($code, $res_hdr) = @{$code->($self)}; + $self->{gz} //= gz_or_noop($res_hdr, $self->{env}); [ $code, $res_hdr, $self ]; } } @@ -116,9 +118,13 @@ sub translate ($$) { sub http_out ($) { my ($self) = @_; - $self->{http_out} //= do { + $self->{http_out} // do { my $args = delete $self->{wcb_args} // return undef; - pop(@$args)->($args); # $wcb->([$code, $hdr_ary]) + my $wcb = pop @$args; # from PublicInbox:HTTP async + # $args->[0] may be \&mbox_hdr or similar + $args = $args->[0]->($self) if ref($args->[0]) eq 'CODE'; + $self->{gz} //= gz_or_noop($args->[1], $self->{env}); + $self->{http_out} = $wcb->($args); # $wcb->([$code, $hdr_ary]) }; } @@ -131,6 +137,7 @@ sub write { # more data to buffer after this sub zmore { my $self = $_[0]; # $_[1] => input + http_out($self); my $err = $self->{gz}->deflate($_[1], $self->{zbuf}); die "gzip->deflate: $err" if $err != Z_OK; undef; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index 4f84eea6..b977308d 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -18,7 +18,7 @@ sub getline { my ($ctx) = @_; # ctx my $smsg = $ctx->{smsg} or return; my $ibx = $ctx->{ibx}; - my $eml = $ibx->smsg_eml($smsg) or return; + my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return; my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}}); $ctx->zmore(msg_hdr($ctx, $eml)); if ($n) { @@ -45,14 +45,15 @@ sub async_eml { # for async_blob_cb my $smsg = delete $ctx->{smsg}; # next message $ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}}); - + local $ctx->{eml} = $eml; # for mbox_hdr $ctx->zmore(msg_hdr($ctx, $eml)); $ctx->write(msg_body($eml)); } -sub res_hdr ($$) { - my ($ctx, $subject) = @_; - my $fn = $subject // ''; +sub mbox_hdr ($) { + my ($ctx) = @_; + my $eml = $ctx->{eml} //= $ctx->{ibx}->smsg_eml($ctx->{smsg}); + my $fn = $eml->header_str('Subject') // ''; $fn =~ s/^re:\s+//i; $fn = to_filename($fn) // 'no-subject'; my @hdr = ('Content-Type'); @@ -64,17 +65,19 @@ sub res_hdr ($$) { push @hdr, 'text/plain'; $fn .= '.txt'; } + my $cs = $ctx->{eml}->ct->{attributes}->{charset} // 'UTF-8'; + $cs = 'UTF-8' if $cs =~ /[^a-zA-Z0-9\-\_]/; # avoid header injection + $hdr[-1] .= "; charset=$cs"; push @hdr, 'Content-Disposition', "inline; filename=$fn"; - \@hdr; + [ 200, \@hdr ]; } # for rare cases where v1 inboxes aren't indexed w/ ->over at all sub no_over_raw ($) { my ($ctx) = @_; my $mref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return; - my $eml = PublicInbox::Eml->new($mref); - [ 200, res_hdr($ctx, $eml->header_str('Subject')), - [ msg_hdr($ctx, $eml) . msg_body($eml) ] ] + my $eml = $ctx->{eml} = PublicInbox::Eml->new($mref); + [ @{mbox_hdr($ctx)}, [ msg_hdr($ctx, $eml) . msg_body($eml) ] ] } # /$INBOX/$MESSAGE_ID/raw @@ -85,9 +88,8 @@ sub emit_raw { my ($id, $prev); my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ]; my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return; - my $res_hdr = res_hdr($ctx, $smsg->{subject}); bless $ctx, __PACKAGE__; - $ctx->psgi_response(200, $res_hdr); + $ctx->psgi_response(\&mbox_hdr); } sub msg_hdr ($$) { -- cgit v1.2.3-24-ge0c7