From 7319f5d318a960eeb32a207d226eea7fd9ce2543 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 25 Oct 2021 02:45:53 +0000 Subject: www: $MSGID/raw: set charset in HTTP response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using the charset specified in the message, web browsers are more likely to display the raw text properly for human readers. Inspired by a patch by Thomas Weißschuh: https://public-inbox.org/meta/20211024214337.161779-3-thomas@t-8ch.de/ Cc: Thomas Weißschuh --- t/plack.t | 26 +++++++++++++++++++++++--- t/psgi_v2.t | 5 ++++- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 't') diff --git a/t/plack.t b/t/plack.t index 40ff2baa..e4dedce6 100644 --- a/t/plack.t +++ b/t/plack.t @@ -10,17 +10,24 @@ require_mods(@mods); foreach my $mod (@mods) { use_ok $mod; } ok(-f $psgi, "psgi example file found"); my $pfx = 'http://example.com/test'; -# ensure successful message delivery -my $ibx = create_inbox('test', sub { +my $eml = eml_load('t/iso-2202-jp.eml'); +# ensure successful message deliveries +my $ibx = create_inbox('test-1', sub { my ($im, $ibx) = @_; my $addr = $ibx->{-primary_address}; - $im->add(PublicInbox::Eml->new(<add'; + $im->add($eml) or xbail '->add'; + $eml->header_set('Content-Type', + "text/plain; charset=\rso\rb\0gus\rithurts"); + $eml->header_set('Message-ID', ''); + $im->add($eml) or xbail '->add'; + $im->add(PublicInbox::Eml->new(<add'; From: Me To: You Cc: $addr Message-Id: Subject: hihi Date: Fri, 02 Oct 1993 00:00:00 +0000 +Content-Type: text/plain; charset=iso-8859-1 > quoted text zzzzzz @@ -195,6 +202,19 @@ test_psgi($app, sub { my $res = $cb->(GET($pfx . '/blah@example.com/raw')); is(200, $res->code, 'success response received for /*/raw'); like($res->content, qr!^From !sm, "mbox returned"); + is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1', + 'charset from message used'); + + $res = $cb->(GET($pfx . '/broken@example.com/raw')); + is($res->header('Content-Type'), 'text/plain; charset=UTF-8', + 'broken charset ignored'); + + $res = $cb->(GET($pfx . '/199707281508.AAA24167@hoyogw.example/raw')); + is($res->header('Content-Type'), 'text/plain; charset=ISO-2022-JP', + 'ISO-2002-JP returned'); + chomp(my $body = $res->content); + my $raw = PublicInbox::Eml->new(\$body); + is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified'); $res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz')); is(501, $res->code, '501 when overview missing'); diff --git a/t/psgi_v2.t b/t/psgi_v2.t index 64c1a8d3..7d73b606 100644 --- a/t/psgi_v2.t +++ b/t/psgi_v2.t @@ -20,11 +20,12 @@ To: test@example.com Subject: this is a subject Message-ID: Date: Fri, 02 Oct 1993 00:00:00 +0000 +Content-Type: text/plain; charset=iso-8859-1 hello world EOF my $new_mid; -my $ibx = create_inbox 'v2', version => 2, indexlevel => 'medium', +my $ibx = create_inbox 'v2-1', version => 2, indexlevel => 'medium', tmpdir => "$tmpdir/v2", sub { my ($im, $ibx) = @_; $im->add($eml) or BAIL_OUT; @@ -68,6 +69,8 @@ my $client0 = sub { like($res->content, qr!\$INBOX_DIR/description missing!, 'got v2 description missing message'); $res = $cb->(GET('/v2test/a-mid@b/raw')); + is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1', + 'charset from message used'); $raw = $res->content; unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted'); like($raw, qr/^hello world$/m, 'got first message'); -- cgit v1.2.3-24-ge0c7