about summary refs log tree commit homepage
path: root/t
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-10-25 02:45:53 +0000
committerEric Wong <e@80x24.org>2021-10-25 08:17:01 +0000
commit7319f5d318a960eeb32a207d226eea7fd9ce2543 (patch)
tree5fbdee2fc27a9c9546f96e0fabd846045f9f3e49 /t
parentead71b8c387f0748338a4add37eeb437a14b02d8 (diff)
downloadpublic-inbox-7319f5d318a960eeb32a207d226eea7fd9ce2543.tar.gz
By using the charset specified in the message, web browsers are
more likely to display the raw text properly for human readers.

Inspired by a patch by Thomas Weißschuh:
  https://public-inbox.org/meta/20211024214337.161779-3-thomas@t-8ch.de/

Cc: Thomas Weißschuh <thomas@t-8ch.de>
Diffstat (limited to 't')
-rw-r--r--t/plack.t26
-rw-r--r--t/psgi_v2.t5
2 files changed, 27 insertions, 4 deletions
diff --git a/t/plack.t b/t/plack.t
index 40ff2baa..e4dedce6 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -10,17 +10,24 @@ require_mods(@mods);
 foreach my $mod (@mods) { use_ok $mod; }
 ok(-f $psgi, "psgi example file found");
 my $pfx = 'http://example.com/test';
-# ensure successful message delivery
-my $ibx = create_inbox('test', sub {
+my $eml = eml_load('t/iso-2202-jp.eml');
+# ensure successful message deliveries
+my $ibx = create_inbox('test-1', sub {
         my ($im, $ibx) = @_;
         my $addr = $ibx->{-primary_address};
-        $im->add(PublicInbox::Eml->new(<<EOF)) or BAIL_OUT '->add';
+        $im->add($eml) or xbail '->add';
+        $eml->header_set('Content-Type',
+                "text/plain; charset=\rso\rb\0gus\rithurts");
+        $eml->header_set('Message-ID', '<broken@example.com>');
+        $im->add($eml) or xbail '->add';
+        $im->add(PublicInbox::Eml->new(<<EOF)) or xbail '->add';
 From: Me <me\@example.com>
 To: You <you\@example.com>
 Cc: $addr
 Message-Id: <blah\@example.com>
 Subject: hihi
 Date: Fri, 02 Oct 1993 00:00:00 +0000
+Content-Type: text/plain; charset=iso-8859-1
 
 > quoted text
 zzzzzz
@@ -195,6 +202,19 @@ test_psgi($app, sub {
         my $res = $cb->(GET($pfx . '/blah@example.com/raw'));
         is(200, $res->code, 'success response received for /*/raw');
         like($res->content, qr!^From !sm, "mbox returned");
+        is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1',
+                'charset from message used');
+
+        $res = $cb->(GET($pfx . '/broken@example.com/raw'));
+        is($res->header('Content-Type'), 'text/plain; charset=UTF-8',
+                'broken charset ignored');
+
+        $res = $cb->(GET($pfx . '/199707281508.AAA24167@hoyogw.example/raw'));
+        is($res->header('Content-Type'), 'text/plain; charset=ISO-2022-JP',
+                'ISO-2002-JP returned');
+        chomp(my $body = $res->content);
+        my $raw = PublicInbox::Eml->new(\$body);
+        is($raw->body_raw, $eml->body_raw, 'ISO-2022-JP body unmodified');
 
         $res = $cb->(GET($pfx . '/blah@example.com/t.mbox.gz'));
         is(501, $res->code, '501 when overview missing');
diff --git a/t/psgi_v2.t b/t/psgi_v2.t
index 64c1a8d3..7d73b606 100644
--- a/t/psgi_v2.t
+++ b/t/psgi_v2.t
@@ -20,11 +20,12 @@ To: test@example.com
 Subject: this is a subject
 Message-ID: <a-mid@b>
 Date: Fri, 02 Oct 1993 00:00:00 +0000
+Content-Type: text/plain; charset=iso-8859-1
 
 hello world
 EOF
 my $new_mid;
-my $ibx = create_inbox 'v2', version => 2, indexlevel => 'medium',
+my $ibx = create_inbox 'v2-1', version => 2, indexlevel => 'medium',
                         tmpdir => "$tmpdir/v2", sub {
         my ($im, $ibx) = @_;
         $im->add($eml) or BAIL_OUT;
@@ -68,6 +69,8 @@ my $client0 = sub {
         like($res->content, qr!\$INBOX_DIR/description missing!,
                 'got v2 description missing message');
         $res = $cb->(GET('/v2test/a-mid@b/raw'));
+        is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1',
+                'charset from message used');
         $raw = $res->content;
         unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted');
         like($raw, qr/^hello world$/m, 'got first message');