about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-10-25 02:45:53 +0000
committerEric Wong <e@80x24.org>2021-10-25 08:17:01 +0000
commit7319f5d318a960eeb32a207d226eea7fd9ce2543 (patch)
tree5fbdee2fc27a9c9546f96e0fabd846045f9f3e49 /lib/PublicInbox
parentead71b8c387f0748338a4add37eeb437a14b02d8 (diff)
downloadpublic-inbox-7319f5d318a960eeb32a207d226eea7fd9ce2543.tar.gz
By using the charset specified in the message, web browsers are
more likely to display the raw text properly for human readers.

Inspired by a patch by Thomas Weißschuh:
  https://public-inbox.org/meta/20211024214337.161779-3-thomas@t-8ch.de/

Cc: Thomas Weißschuh <thomas@t-8ch.de>
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/GzipFilter.pm19
-rw-r--r--lib/PublicInbox/Mbox.pm24
2 files changed, 26 insertions, 17 deletions
diff --git a/lib/PublicInbox/GzipFilter.pm b/lib/PublicInbox/GzipFilter.pm
index c4858a97..e37f1f76 100644
--- a/lib/PublicInbox/GzipFilter.pm
+++ b/lib/PublicInbox/GzipFilter.pm
@@ -46,11 +46,10 @@ sub gz_or_noop {
 sub gzf_maybe ($$) { bless { gz => gz_or_noop(@_) }, __PACKAGE__ }
 
 sub psgi_response {
+        # $code may be an HTTP response code (e.g. 200) or a CODE ref (mbox_hdr)
         my ($self, $code, $res_hdr) = @_;
-        my $env = $self->{env};
-        $self->{gz} //= gz_or_noop($res_hdr, $env);
-        if ($env->{'pi-httpd.async'}) {
-                my $http = $env->{'psgix.io'}; # PublicInbox::HTTP
+        if ($self->{env}->{'pi-httpd.async'}) {
+                my $http = $self->{env}->{'psgix.io'}; # PublicInbox::HTTP
                 $http->{forward} = $self;
                 sub {
                         my ($wcb) = @_; # -httpd provided write callback
@@ -58,6 +57,9 @@ sub psgi_response {
                         $self->can('async_next')->($http); # start stepping
                 };
         } else { # generic PSGI code path
+                ref($code) eq 'CODE' and
+                        ($code, $res_hdr) = @{$code->($self)};
+                $self->{gz} //= gz_or_noop($res_hdr, $self->{env});
                 [ $code, $res_hdr, $self ];
         }
 }
@@ -116,9 +118,13 @@ sub translate ($$) {
 
 sub http_out ($) {
         my ($self) = @_;
-        $self->{http_out} //= do {
+        $self->{http_out} // do {
                 my $args = delete $self->{wcb_args} // return undef;
-                pop(@$args)->($args); # $wcb->([$code, $hdr_ary])
+                my $wcb = pop @$args; # from PublicInbox:HTTP async
+                # $args->[0] may be \&mbox_hdr or similar
+                $args = $args->[0]->($self) if ref($args->[0]) eq 'CODE';
+                $self->{gz} //= gz_or_noop($args->[1], $self->{env});
+                $self->{http_out} = $wcb->($args); # $wcb->([$code, $hdr_ary])
         };
 }
 
@@ -131,6 +137,7 @@ sub write {
 # more data to buffer after this
 sub zmore {
         my $self = $_[0]; # $_[1] => input
+        http_out($self);
         my $err = $self->{gz}->deflate($_[1], $self->{zbuf});
         die "gzip->deflate: $err" if $err != Z_OK;
         undef;
diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm
index 4f84eea6..b977308d 100644
--- a/lib/PublicInbox/Mbox.pm
+++ b/lib/PublicInbox/Mbox.pm
@@ -18,7 +18,7 @@ sub getline {
         my ($ctx) = @_; # ctx
         my $smsg = $ctx->{smsg} or return;
         my $ibx = $ctx->{ibx};
-        my $eml = $ibx->smsg_eml($smsg) or return;
+        my $eml = delete($ctx->{eml}) // $ibx->smsg_eml($smsg) // return;
         my $n = $ctx->{smsg} = $ibx->over->next_by_mid(@{$ctx->{next_arg}});
         $ctx->zmore(msg_hdr($ctx, $eml));
         if ($n) {
@@ -45,14 +45,15 @@ sub async_eml { # for async_blob_cb
         my $smsg = delete $ctx->{smsg};
         # next message
         $ctx->{smsg} = $ctx->{ibx}->over->next_by_mid(@{$ctx->{next_arg}});
-
+        local $ctx->{eml} = $eml; # for mbox_hdr
         $ctx->zmore(msg_hdr($ctx, $eml));
         $ctx->write(msg_body($eml));
 }
 
-sub res_hdr ($$) {
-        my ($ctx, $subject) = @_;
-        my $fn = $subject // '';
+sub mbox_hdr ($) {
+        my ($ctx) = @_;
+        my $eml = $ctx->{eml} //= $ctx->{ibx}->smsg_eml($ctx->{smsg});
+        my $fn = $eml->header_str('Subject') // '';
         $fn =~ s/^re:\s+//i;
         $fn = to_filename($fn) // 'no-subject';
         my @hdr = ('Content-Type');
@@ -64,17 +65,19 @@ sub res_hdr ($$) {
                 push @hdr, 'text/plain';
                 $fn .= '.txt';
         }
+        my $cs = $ctx->{eml}->ct->{attributes}->{charset} // 'UTF-8';
+        $cs = 'UTF-8' if $cs =~ /[^a-zA-Z0-9\-\_]/; # avoid header injection
+        $hdr[-1] .= "; charset=$cs";
         push @hdr, 'Content-Disposition', "inline; filename=$fn";
-        \@hdr;
+        [ 200, \@hdr ];
 }
 
 # for rare cases where v1 inboxes aren't indexed w/ ->over at all
 sub no_over_raw ($) {
         my ($ctx) = @_;
         my $mref = $ctx->{ibx}->msg_by_mid($ctx->{mid}) or return;
-        my $eml = PublicInbox::Eml->new($mref);
-        [ 200, res_hdr($ctx, $eml->header_str('Subject')),
-                [ msg_hdr($ctx, $eml) . msg_body($eml) ] ]
+        my $eml = $ctx->{eml} = PublicInbox::Eml->new($mref);
+        [ @{mbox_hdr($ctx)}, [ msg_hdr($ctx, $eml) . msg_body($eml) ] ]
 }
 
 # /$INBOX/$MESSAGE_ID/raw
@@ -85,9 +88,8 @@ sub emit_raw {
         my ($id, $prev);
         my $mip = $ctx->{next_arg} = [ $ctx->{mid}, \$id, \$prev ];
         my $smsg = $ctx->{smsg} = $over->next_by_mid(@$mip) or return;
-        my $res_hdr = res_hdr($ctx, $smsg->{subject});
         bless $ctx, __PACKAGE__;
-        $ctx->psgi_response(200, $res_hdr);
+        $ctx->psgi_response(\&mbox_hdr);
 }
 
 sub msg_hdr ($$) {