From 0ec3ddaeea0e3eac3f4e686cd4383840414fbc4d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Mar 2021 19:45:39 -0600 Subject: msg_part_text: discover text in application/octet-stream Some poorly-configured MUAs will send application/octet-stream even for text-only attachments. We can't make expect all MUAs are configured with proper MIME types, and there is plenty of historical mail that falls into this unfortunate criteria. v2: simplify the check and ensures returned text is Perl "utf8" --- lib/PublicInbox/MsgIter.pm | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib/PublicInbox/MsgIter.pm') diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index c503eb98..9c6581cc 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -84,6 +84,14 @@ sub msg_part_text ($$) { # If forcing charset=UTF-8 failed, # caller will warn further down... $s = $part->body if $@; + } elsif ($err && $ct =~ m!\bapplication/octet-stream\b!i) { + # Some unconfigured/poorly-configured MUAs will set + # application/octet-stream even for all text attachments. + # Try to see if it's printable text that we can index + # and display: + $s = $part->body; + utf8::decode($s); + undef($s =~ /[^\p{XPosixPrint}\s]/s ? $s : $err); } ($s, $err); } -- cgit v1.2.3-24-ge0c7