about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2018-12-30 12:41:25 +0000
committerEric Wong <e@80x24.org>2018-12-30 20:15:06 +0000
commit7d82a8bc04ce2e686371abc6b438ab121b9fa7d0 (patch)
tree137fd61f2f12c0f781b670ef25bf65efbe60ab1b /lib/PublicInbox
parentc3a8ba378c7d3548a5d3ede110b90f8aa8e2473e (diff)
downloadpublic-inbox-7d82a8bc04ce2e686371abc6b438ab121b9fa7d0.tar.gz
I've found two examples on https://lore.kernel.org/lkml/
where the messages declared themselves to be "multipart/mixed"
but were actually plain text:

	<87llgalspt.fsf@free.fr>
	<200308111450.h7BEoOu20077@mail.osdl.org>

With the mboxrd downloaded, mutt is able to view them without
difficulty.

Note: this change would require reindexing of Xapian to pick up
the changes.  But it's only two ancient messages, the first was
resent by the original sender and the second is too old to be
relevant.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/ContentId.pm9
-rw-r--r--lib/PublicInbox/MsgIter.pm25
-rw-r--r--lib/PublicInbox/SearchIdx.pm14
-rw-r--r--lib/PublicInbox/View.pm25
4 files changed, 29 insertions, 44 deletions
diff --git a/lib/PublicInbox/ContentId.pm b/lib/PublicInbox/ContentId.pm
index b1d27eb8..dd3155be 100644
--- a/lib/PublicInbox/ContentId.pm
+++ b/lib/PublicInbox/ContentId.pm
@@ -75,14 +75,7 @@ sub content_digest ($) {
                 }
                 $dig->add("b\0");
                 my $ct = $part->content_type || 'text/plain';
-                my $s = eval { $part->body_str };
-                if ($@ && $ct =~ m!\btext/plain\b!i) {
-                        # Try to assume UTF-8 because Alpine
-                        # seems to do wacky things and set
-                        # charset=X-UNKNOWN
-                        $part->charset_set('UTF-8');
-                        $s = eval { $part->body_str };
-                }
+                my ($s, undef) = msg_part_text($part, $ct);
                 if (defined $s) {
                         $s =~ s/\r\n/\n/gs;
                         $s =~ s/\s*\z//s;
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index a795f617..9e2d797f 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -5,7 +5,7 @@ package PublicInbox::MsgIter;
 use strict;
 use warnings;
 use base qw(Exporter);
-our @EXPORT = qw(msg_iter);
+our @EXPORT = qw(msg_iter msg_part_text);
 use PublicInbox::MIME;
 
 # Like Email::MIME::walk_parts, but this is:
@@ -34,4 +34,27 @@ sub msg_iter ($$) {
         }
 }
 
+sub msg_part_text ($$) {
+        my ($part, $ct) = @_;
+
+        my $s = eval { $part->body_str };
+        my $err = $@;
+
+        # text/plain is the default, multipart/mixed happened a few
+        # times when it should not have been:
+        #   <87llgalspt.fsf@free.fr>
+        #   <200308111450.h7BEoOu20077@mail.osdl.org>
+        if ($ct =~ m!\btext/plain\b!i || $ct =~ m!\bmultipart/mixed\b!i) {
+                # Try to assume UTF-8 because Alpine seems to
+                # do wacky things and set charset=X-UNKNOWN
+                $part->charset_set('UTF-8');
+                $s = eval { $part->body_str };
+
+                # If forcing charset=UTF-8 failed,
+                # caller will warn further down...
+                $s = $part->body if $@;
+        }
+        ($s, $err);
+}
+
 1;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index ca832ad3..76f3f33a 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -305,19 +305,7 @@ sub add_xapian ($$$$$) {
                         $self->index_text($fn, 1, 'XFN');
                 }
 
-                return if $ct =~ m!\btext/x?html\b!i;
-
-                my $s = eval { $part->body_str };
-                if ($@) {
-                        if ($ct =~ m!\btext/plain\b!i) {
-                                # Try to assume UTF-8 because Alpine
-                                # seems to do wacky things and set
-                                # charset=X-UNKNOWN
-                                $part->charset_set('UTF-8');
-                                $s = eval { $part->body_str };
-                                $s = $part->body if $@;
-                        }
-                }
+                my ($s, undef) = msg_part_text($part, $ct);
                 defined $s or return;
 
                 my (@orig, @quot);
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 86acd824..bb49c035 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -543,33 +543,14 @@ sub add_text_body {
         my ($part, $depth) = @$p; # attachment @idx is unused
         my $ct = $part->content_type || 'text/plain';
         my $fn = $part->filename;
+        my ($s, $err) = msg_part_text($part, $ct);
 
-        if ($ct =~ m!\btext/x?html\b!i) {
-                return attach_link($upfx, $ct, $p, $fn);
-        }
-
-        my $s = eval { $part->body_str };
-
-        # badly-encoded message? tell the world about it!
-        my $err = $@;
-        if ($err) {
-                if ($ct =~ m!\btext/plain\b!i) {
-                        # Try to assume UTF-8 because Alpine seems to
-                        # do wacky things and set charset=X-UNKNOWN
-                        $part->charset_set('UTF-8');
-                        $s = eval { $part->body_str };
-
-                        # If forcing charset=UTF-8 failed,
-                        # attach_link will warn further down...
-                        $s = $part->body if $@;
-                } else {
-                        return attach_link($upfx, $ct, $p, $fn);
-                }
-        }
+        return attach_link($upfx, $ct, $p, $fn) unless defined $s;
 
         my @lines = split(/^/m, $s);
         $s = '';
         if (defined($fn) || $depth > 0 || $err) {
+                # badly-encoded message with $err? tell the world about it!
                 $s .= attach_link($upfx, $ct, $p, $fn, $err);
                 $s .= "\n";
         }