From 7d82a8bc04ce2e686371abc6b438ab121b9fa7d0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 30 Dec 2018 12:41:25 +0000 Subject: handle "multipart/mixed" messages which are not multipart I've found two examples on https://lore.kernel.org/lkml/ where the messages declared themselves to be "multipart/mixed" but were actually plain text: <87llgalspt.fsf@free.fr> <200308111450.h7BEoOu20077@mail.osdl.org> With the mboxrd downloaded, mutt is able to view them without difficulty. Note: this change would require reindexing of Xapian to pick up the changes. But it's only two ancient messages, the first was resent by the original sender and the second is too old to be relevant. --- lib/PublicInbox/MsgIter.pm | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/MsgIter.pm') diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index a795f617..9e2d797f 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -5,7 +5,7 @@ package PublicInbox::MsgIter; use strict; use warnings; use base qw(Exporter); -our @EXPORT = qw(msg_iter); +our @EXPORT = qw(msg_iter msg_part_text); use PublicInbox::MIME; # Like Email::MIME::walk_parts, but this is: @@ -34,4 +34,27 @@ sub msg_iter ($$) { } } +sub msg_part_text ($$) { + my ($part, $ct) = @_; + + my $s = eval { $part->body_str }; + my $err = $@; + + # text/plain is the default, multipart/mixed happened a few + # times when it should not have been: + # <87llgalspt.fsf@free.fr> + # <200308111450.h7BEoOu20077@mail.osdl.org> + if ($ct =~ m!\btext/plain\b!i || $ct =~ m!\bmultipart/mixed\b!i) { + # Try to assume UTF-8 because Alpine seems to + # do wacky things and set charset=X-UNKNOWN + $part->charset_set('UTF-8'); + $s = eval { $part->body_str }; + + # If forcing charset=UTF-8 failed, + # caller will warn further down... + $s = $part->body if $@; + } + ($s, $err); +} + 1; -- cgit v1.2.3-24-ge0c7