From 0ec3ddaeea0e3eac3f4e686cd4383840414fbc4d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 10 Mar 2021 19:45:39 -0600 Subject: msg_part_text: discover text in application/octet-stream Some poorly-configured MUAs will send application/octet-stream even for text-only attachments. We can't make expect all MUAs are configured with proper MIME types, and there is plenty of historical mail that falls into this unfortunate criteria. v2: simplify the check and ensures returned text is Perl "utf8" --- t/msg_iter.t | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 5 deletions(-) (limited to 't/msg_iter.t') diff --git a/t/msg_iter.t b/t/msg_iter.t index e46d515c..ae3594da 100644 --- a/t/msg_iter.t +++ b/t/msg_iter.t @@ -1,10 +1,8 @@ # Copyright (C) 2016-2021 all contributors # License: AGPL-3.0+ -use strict; -use warnings; -use Test::More; -use PublicInbox::TestCommon; +use strict; use v5.10.1; use PublicInbox::TestCommon; use PublicInbox::Hval qw(ascii_html); +use MIME::QuotedPrint 3.05 qw(encode_qp); use_ok('PublicInbox::MsgIter'); { @@ -88,5 +86,62 @@ use_ok('PublicInbox::MsgIter'); is($check[1], $nq, 'long quoted section matches'); } +{ + open my $fh, '<', 't/utf8.eml' or BAIL_OUT $!; + my $expect = do { local $/; <$fh> }; + my $qp_patch = encode_qp($expect, "\r\n"); + my $common = <new(<each_part(sub { + my ($part, $level, @ex) = @{$_[0]}; + my ($s, $err) = msg_part_text($part, $part->content_type); + push @parts, $s; + }); + $expect =~ s/\n/\r\n/sg; + utf8::decode($expect); # aka "bytes2str" + is_deeply(\@parts, [ "blah\r\n", $expect ], + 'fallback to application/octet-stream as UTF-8 text'); + + my $qp_binary = encode_qp("Binary\0crap", "\r\n"); + $eml = PublicInbox::Eml->new(<each_part(sub { + my ($part, $level, @ex) = @{$_[0]}; + my ($s, $err) = msg_part_text($part, $part->content_type); + push @parts, $s; + push @err, $err; + }); + is_deeply(\@parts, [ "blah\r\n", undef ], + 'non-text ignored in octet-stream'); + ok($err[1], 'got error for second element'); +} + done_testing(); -1; -- cgit v1.2.3-24-ge0c7