From 130af18f06ae9b91e07985ff56b4dd90cedbd744 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 31 Oct 2019 00:33:04 +0000 Subject: msgiter: do not assume UTF-8 if Email::MIME->body_str succeeds ISO-2202-JP and other non-UTF-8 messages need to be displayed correctly. Fixes: 7d82a8bc04ce ('handle "multipart/mixed" messages which are not multipart') --- MANIFEST | 1 + lib/PublicInbox/MsgIter.pm | 3 ++- t/iso-2202-jp.mbox | 10 ++++++++++ t/msg_iter.t | 18 ++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 t/iso-2202-jp.mbox diff --git a/MANIFEST b/MANIFEST index d1b6749a..dfabd7f2 100644 --- a/MANIFEST +++ b/MANIFEST @@ -231,6 +231,7 @@ t/inbox.t t/indexlevels-mirror-v1.t t/indexlevels-mirror.t t/init.t +t/iso-2202-jp.mbox t/linkify.t t/main-bin/spamc t/mda.t diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index ce08ff84..f11ba223 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -45,7 +45,8 @@ sub msg_part_text ($$) { # times when it should not have been: # <87llgalspt.fsf@free.fr> # <200308111450.h7BEoOu20077@mail.osdl.org> - if ($ct =~ m!\btext/plain\b!i || $ct =~ m!\bmultipart/mixed\b!i) { + if ($err && ($ct =~ m!\btext/plain\b!i || + $ct =~ m!\bmultipart/mixed\b!i)) { # Try to assume UTF-8 because Alpine seems to # do wacky things and set charset=X-UNKNOWN $part->charset_set('UTF-8'); diff --git a/t/iso-2202-jp.mbox b/t/iso-2202-jp.mbox new file mode 100644 index 00000000..1a8e1974 --- /dev/null +++ b/t/iso-2202-jp.mbox @@ -0,0 +1,10 @@ +From historical@ruby-dev Thu Jan 1 00:00:00 1970 +Message-Id: <199707281508.AAA24167@hoyogw.example> +Date: Tue, 29 Jul 97 00:08:29 +0900 +From: matz@example.com +Subject: [ruby-dev:4] +To: ruby-dev@example +Mime-Version: 1.0 +Content-Type: text/plain; charset=ISO-2022-JP + +|けいじゅ@今はフリー(^^;;;です. diff --git a/t/msg_iter.t b/t/msg_iter.t index f6fd3bb0..f9b586f1 100644 --- a/t/msg_iter.t +++ b/t/msg_iter.t @@ -40,5 +40,23 @@ use_ok('PublicInbox::MsgIter'); 'nested part shows up properly'); } +{ + my $f = 't/iso-2202-jp.mbox'; + my $mime = Email::MIME->new(do { + open my $fh, '<', $f or die "open($f): $!"; + local $/; + <$fh>; + }); + my $raw = ''; + msg_iter($mime, sub { + my ($part, $level, @ex) = @{$_[0]}; + my ($s, $err) = msg_part_text($part, 'text/plain'); + ok(!$err, 'no error'); + $raw .= $s; + }); + ok(length($raw) > 0, 'got non-empty message'); + is(index($raw, '$$$'), -1, 'no unescaped $$$'); +} + done_testing(); 1; -- cgit v1.2.3-24-ge0c7