about summary refs log tree commit homepage
path: root/lib/PublicInbox/MIME.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-01-10 21:40:37 +0000
committerEric Wong <e@80x24.org>2017-01-10 22:02:51 +0000
commit3fc59df0d633a17e0c5e43d633d12e8772c06ec3 (patch)
tree2c4f6800777694fa8e2d40b625d2fe766f719adf /lib/PublicInbox/MIME.pm
parentf1a38b18d9a46531e18143a7cd4c7a66fc89adbc (diff)
downloadpublic-inbox-3fc59df0d633a17e0c5e43d633d12e8772c06ec3.tar.gz
This should fix problems with multipart messages where
text/plain parts lack a header.

cf. git clone --mirror https://github.com/rjbs/Email-MIME.git
    refs/pull/28/head

In the future, we may still introduce as streaming
interface to reduce memory usage on large emails.
Diffstat (limited to 'lib/PublicInbox/MIME.pm')
-rw-r--r--lib/PublicInbox/MIME.pm102
1 files changed, 102 insertions, 0 deletions
diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm
new file mode 100644
index 00000000..792fffd6
--- /dev/null
+++ b/lib/PublicInbox/MIME.pm
@@ -0,0 +1,102 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# The license for this file differs from the rest of public-inbox.
+#
+# It monkey patches the "parts_multipart" subroutine with patches
+# from Matthew Horsfall <wolfsage@gmail.com> at:
+#
+# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head
+#
+# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877
+# ("Make sure we don't modify the body of a message when injecting a header.")
+# commit 981d8201a7239b02114489529fd366c4c576a146
+# ("GH #14 - Handle CRLF emails properly.")
+# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e
+# ("GH #14 - Support multipart messages without content-type in subparts.")
+#
+# For Email::MIME >= 1.923 && < 1.935,
+# commit dcef9be66c49ae89c7a5027a789bbbac544499ce
+# ("removing all trailing newlines was too much")
+# is also included
+package PublicInbox::MIME;
+use strict;
+use warnings;
+use base qw(Email::MIME);
+
+if ($Email::MIME::VERSION <= 1.937) {
+sub parts_multipart {
+  my $self     = shift;
+  my $boundary = $self->{ct}->{attributes}->{boundary};
+
+  # Take a message, join all its lines together.  Now try to Email::MIME->new
+  # it with 1.861 or earlier.  Death!  It tries to recurse endlessly on the
+  # body, because every time it splits on boundary it gets itself. Obviously
+  # that means it's a bogus message, but a mangled result (or exception) is
+  # better than endless recursion. -- rjbs, 2008-01-07
+  return $self->parts_single_part
+    unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm;
+
+  $self->{body_raw} = $self->SUPER::body;
+
+  # rfc1521 7.2.1
+  my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2;
+
+  # Split on boundaries, but keep blank lines after them intact
+  my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || '');
+
+  $self->SUPER::body_set(undef);
+
+  # If there are no headers in the potential MIME part, it's just part of the
+  # body.  This is a horrible hack, although it's debatable whether it was
+  # better or worse when it was $self->{body} = shift @bits ... -- rjbs,
+  # 2006-11-27
+  $self->SUPER::body_set(shift @bits) if ($bits[0] || '') !~ /.*:.*/;
+
+  my $bits = @bits;
+
+  my @parts;
+  for my $bit (@bits) {
+    # Parts don't need headers. If they don't have them, they look like this:
+    #
+    #   --90e6ba6e8d06f1723604fc1b809a
+    #
+    #   Part 2
+    #
+    #   Part 2a
+    #
+    # $bit will contain two new lines before Part 2.
+    #
+    # Anything with headers will only have one new line.
+    #
+    # RFC 1341 Section 7.2 says parts without headers are to be considered
+    # plain US-ASCII text. -- alh
+    # 2016-08-01
+    my $added_header;
+
+    if ($bit =~ /^(?:$self->{mycrlf}){2}/) {
+      $bit = "Content-type: text/plain; charset=us-ascii" . $bit;
+
+      $added_header = 1;
+    }
+
+    $bit =~ s/\A[\n\r]+//smg;
+    $bit =~ s/(?<!\x0d)$self->{mycrlf}\Z//sm;
+
+    my $email = (ref $self)->new($bit);
+
+    if ($added_header) {
+      # Remove our changes so we don't change the raw email content
+      $email->header_str_set('Content-Type');
+    }
+
+    push @parts, $email;
+  }
+
+  $self->{parts} = \@parts;
+
+  return @{ $self->{parts} };
+}
+}
+
+1;