about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2017-01-10 21:40:37 +0000
committerEric Wong <e@80x24.org>2017-01-10 22:02:51 +0000
commit3fc59df0d633a17e0c5e43d633d12e8772c06ec3 (patch)
tree2c4f6800777694fa8e2d40b625d2fe766f719adf
parentf1a38b18d9a46531e18143a7cd4c7a66fc89adbc (diff)
downloadpublic-inbox-3fc59df0d633a17e0c5e43d633d12e8772c06ec3.tar.gz
This should fix problems with multipart messages where
text/plain parts lack a header.

cf. git clone --mirror https://github.com/rjbs/Email-MIME.git
    refs/pull/28/head

In the future, we may still introduce as streaming
interface to reduce memory usage on large emails.
-rw-r--r--MANIFEST2
-rw-r--r--lib/PublicInbox/Feed.pm6
-rw-r--r--lib/PublicInbox/Filter/Vger.pm2
-rw-r--r--lib/PublicInbox/Import.pm2
-rw-r--r--lib/PublicInbox/MIME.pm102
-rw-r--r--lib/PublicInbox/MsgIter.pm22
-rw-r--r--lib/PublicInbox/Search.pm2
-rw-r--r--lib/PublicInbox/SearchIdx.pm4
-rw-r--r--lib/PublicInbox/SearchView.pm6
-rw-r--r--lib/PublicInbox/View.pm8
-rw-r--r--lib/PublicInbox/WWW.pm6
-rw-r--r--lib/PublicInbox/WatchMaildir.pm6
-rw-r--r--lib/PublicInbox/WwwAttach.pm4
-rwxr-xr-xscript/public-inbox-learn4
-rwxr-xr-xscript/public-inbox-mda2
-rw-r--r--t/import.t6
-rw-r--r--t/mime.t57
17 files changed, 191 insertions, 50 deletions
diff --git a/MANIFEST b/MANIFEST
index 8f5e487e..76fd1da6 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -65,6 +65,7 @@ lib/PublicInbox/Linkify.pm
 lib/PublicInbox/Listener.pm
 lib/PublicInbox/MDA.pm
 lib/PublicInbox/MID.pm
+lib/PublicInbox/MIME.pm
 lib/PublicInbox/Mbox.pm
 lib/PublicInbox/MsgIter.pm
 lib/PublicInbox/Msgmap.pm
@@ -143,6 +144,7 @@ t/linkify.t
 t/main-bin/spamc
 t/mda.t
 t/mid.t
+t/mime.t
 t/msg_iter.t
 t/msgmap.t
 t/nntp.t
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 2a33fd29..e5d57550 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -5,7 +5,7 @@
 package PublicInbox::Feed;
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
 
@@ -39,7 +39,7 @@ sub generate_thread_atom {
         PublicInbox::WwwAtomStream->response($ctx, 200, sub {
                 while (my $msg = shift @$msgs) {
                         $msg = $ibx->msg_by_smsg($msg) and
-                                        return Email::MIME->new($msg);
+                                return PublicInbox::MIME->new($msg);
                 }
         });
 }
@@ -175,7 +175,7 @@ sub each_recent_blob {
 sub do_cat_mail {
         my ($ibx, $path) = @_;
         my $mime = eval { $ibx->msg_by_path($path) } or return;
-        Email::MIME->new($mime);
+        PublicInbox::MIME->new($mime);
 }
 
 1;
diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm
index 2ffed184..905f28d7 100644
--- a/lib/PublicInbox/Filter/Vger.pm
+++ b/lib/PublicInbox/Filter/Vger.pm
@@ -25,7 +25,7 @@ sub scrub {
         # so in multipart (e.g. GPG-signed) messages, the list trailer
         # becomes invisible to MIME-aware email clients.
         if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) {
-                $mime = Email::MIME->new(\$s);
+                $mime = PublicInbox::MIME->new(\$s);
         }
         $self->ACCEPT($mime);
 }
diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index 1ac112b8..13671a4f 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -119,7 +119,7 @@ sub remove {
         $n = read($r, my $lf, 1);
         defined($n) or die "read final byte of cat-blob failed: $!";
         die "bad read on final byte: <$lf>" if $lf ne "\n";
-        my $cur = Email::MIME->new($buf);
+        my $cur = PublicInbox::MIME->new($buf);
         my $cur_s = $cur->header('Subject');
         $cur_s = '' unless defined $cur_s;
         my $cur_m = $mime->header('Subject');
diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm
new file mode 100644
index 00000000..792fffd6
--- /dev/null
+++ b/lib/PublicInbox/MIME.pm
@@ -0,0 +1,102 @@
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+#
+# The license for this file differs from the rest of public-inbox.
+#
+# It monkey patches the "parts_multipart" subroutine with patches
+# from Matthew Horsfall <wolfsage@gmail.com> at:
+#
+# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head
+#
+# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877
+# ("Make sure we don't modify the body of a message when injecting a header.")
+# commit 981d8201a7239b02114489529fd366c4c576a146
+# ("GH #14 - Handle CRLF emails properly.")
+# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e
+# ("GH #14 - Support multipart messages without content-type in subparts.")
+#
+# For Email::MIME >= 1.923 && < 1.935,
+# commit dcef9be66c49ae89c7a5027a789bbbac544499ce
+# ("removing all trailing newlines was too much")
+# is also included
+package PublicInbox::MIME;
+use strict;
+use warnings;
+use base qw(Email::MIME);
+
+if ($Email::MIME::VERSION <= 1.937) {
+sub parts_multipart {
+  my $self     = shift;
+  my $boundary = $self->{ct}->{attributes}->{boundary};
+
+  # Take a message, join all its lines together.  Now try to Email::MIME->new
+  # it with 1.861 or earlier.  Death!  It tries to recurse endlessly on the
+  # body, because every time it splits on boundary it gets itself. Obviously
+  # that means it's a bogus message, but a mangled result (or exception) is
+  # better than endless recursion. -- rjbs, 2008-01-07
+  return $self->parts_single_part
+    unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm;
+
+  $self->{body_raw} = $self->SUPER::body;
+
+  # rfc1521 7.2.1
+  my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2;
+
+  # Split on boundaries, but keep blank lines after them intact
+  my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || '');
+
+  $self->SUPER::body_set(undef);
+
+  # If there are no headers in the potential MIME part, it's just part of the
+  # body.  This is a horrible hack, although it's debatable whether it was
+  # better or worse when it was $self->{body} = shift @bits ... -- rjbs,
+  # 2006-11-27
+  $self->SUPER::body_set(shift @bits) if ($bits[0] || '') !~ /.*:.*/;
+
+  my $bits = @bits;
+
+  my @parts;
+  for my $bit (@bits) {
+    # Parts don't need headers. If they don't have them, they look like this:
+    #
+    #   --90e6ba6e8d06f1723604fc1b809a
+    #
+    #   Part 2
+    #
+    #   Part 2a
+    #
+    # $bit will contain two new lines before Part 2.
+    #
+    # Anything with headers will only have one new line.
+    #
+    # RFC 1341 Section 7.2 says parts without headers are to be considered
+    # plain US-ASCII text. -- alh
+    # 2016-08-01
+    my $added_header;
+
+    if ($bit =~ /^(?:$self->{mycrlf}){2}/) {
+      $bit = "Content-type: text/plain; charset=us-ascii" . $bit;
+
+      $added_header = 1;
+    }
+
+    $bit =~ s/\A[\n\r]+//smg;
+    $bit =~ s/(?<!\x0d)$self->{mycrlf}\Z//sm;
+
+    my $email = (ref $self)->new($bit);
+
+    if ($added_header) {
+      # Remove our changes so we don't change the raw email content
+      $email->header_str_set('Content-Type');
+    }
+
+    push @parts, $email;
+  }
+
+  $self->{parts} = \@parts;
+
+  return @{ $self->{parts} };
+}
+}
+
+1;
diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm
index ef0d209f..5be06a44 100644
--- a/lib/PublicInbox/MsgIter.pm
+++ b/lib/PublicInbox/MsgIter.pm
@@ -6,17 +6,7 @@ use strict;
 use warnings;
 use base qw(Exporter);
 our @EXPORT = qw(msg_iter);
-use Email::MIME;
-use Scalar::Util qw(readonly);
-
-# Workaround Email::MIME versions without
-# commit dcef9be66c49ae89c7a5027a789bbbac544499ce
-# ("removing all trailing newlines was too much")
-# This is necessary for Debian jessie
-my $bad = 1.923;
-my $good = 1.935;
-my $ver = $Email::MIME::VERSION;
-my $extra_nl = 1 if ($ver >= $bad && $ver < $good);
+use PublicInbox::MIME;
 
 # Like Email::MIME::walk_parts, but this is:
 # * non-recursive
@@ -36,16 +26,6 @@ sub msg_iter ($$) {
                                 @sub = map { [ $_, $depth, @idx, ++$i ] } @sub;
                                 @parts = (@sub, @parts);
                         } else {
-                                if ($extra_nl) {
-                                        my $lf = $part->{mycrlf};
-                                        my $bref = $part->{body};
-                                        if (readonly($$bref)) {
-                                                my $s = $$bref . $lf;
-                                                $part->{body} = \$s;
-                                        } else {
-                                                $$bref .= $lf;
-                                        }
-                                }
                                 $cb->($p);
                         }
                 }
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index a1bae419..c9094245 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -16,7 +16,7 @@ use constant YYYYMMDD => 4; # for searching in the WWW UI
 
 use Search::Xapian qw/:standard/;
 use PublicInbox::SearchMsg;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::MID qw/mid_clean id_compress/;
 
 # This is English-only, everything else is non-standard and may be confused as
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 87ee0d46..d63dd7c7 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -10,7 +10,7 @@ package PublicInbox::SearchIdx;
 use strict;
 use warnings;
 use Fcntl qw(:flock :DEFAULT);
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 use base qw(PublicInbox::Search);
@@ -400,7 +400,7 @@ sub do_cat_mail {
                 my $str = $git->cat_file($blob, $sizeref);
                 # fixup bugs from import:
                 $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
-                Email::MIME->new($str);
+                PublicInbox::MIME->new($str);
         };
         $@ ? undef : $mime;
 }
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index bd634d8d..ccc53abf 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -10,7 +10,7 @@ use PublicInbox::Hval qw/ascii_html/;
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
 use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape);
-use Email::MIME;
+use PublicInbox::MIME;
 require PublicInbox::Git;
 require PublicInbox::SearchThread;
 our $LIM = 50;
@@ -205,7 +205,7 @@ sub mset_thread {
                         $mime = $inbox->msg_by_smsg($mime) and last;
                 }
                 if ($mime) {
-                        $mime = Email::MIME->new($mime);
+                        $mime = PublicInbox::MIME->new($mime);
                         return PublicInbox::View::index_entry($mime, $ctx,
                                 scalar @$msgs);
                 }
@@ -239,7 +239,7 @@ sub adump {
                 while (my $x = shift @items) {
                         $x = load_doc_retry($srch, $x);
                         $x = $ibx->msg_by_smsg($x) and
-                                        return Email::MIME->new($x);
+                                        return PublicInbox::MIME->new($x);
                 }
                 return undef;
         });
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index e4e9d7d2..2c37cd42 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -299,7 +299,7 @@ sub stream_thread ($$) {
         }
         return missing_thread($ctx) unless $mime;
 
-        $mime = Email::MIME->new($mime);
+        $mime = PublicInbox::MIME->new($mime);
         $ctx->{-title_html} = ascii_html($mime->header('Subject'));
         $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime);
         PublicInbox::WwwStream->response($ctx, 200, sub {
@@ -311,7 +311,7 @@ sub stream_thread ($$) {
                         unshift @q, map { ($cl, $_) } @{$node->{children}};
                         my $mid = $node->{id};
                         if ($mime = $inbox->msg_by_smsg($node->{smsg})) {
-                                $mime = Email::MIME->new($mime);
+                                $mime = PublicInbox::MIME->new($mime);
                                 return thread_index_entry($ctx, $level, $mime);
                         } else {
                                 return ghost_index_entry($ctx, $level, $node);
@@ -362,7 +362,7 @@ sub thread_html {
                 $mime = $inbox->msg_by_smsg($mime) and last;
         }
         return missing_thread($ctx) unless $mime;
-        $mime = Email::MIME->new($mime);
+        $mime = PublicInbox::MIME->new($mime);
         $ctx->{-title_html} = ascii_html($mime->header('Subject'));
         $ctx->{-html_tip} = '<pre>'.index_entry($mime, $ctx, scalar @$msgs);
         $mime = undef;
@@ -372,7 +372,7 @@ sub thread_html {
                         $mime = $inbox->msg_by_smsg($mime) and last;
                 }
                 if ($mime) {
-                        $mime = Email::MIME->new($mime);
+                        $mime = PublicInbox::MIME->new($mime);
                         return index_entry($mime, $ctx, scalar @$msgs);
                 }
                 $msgs = undef;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11fc92e9..430e6b19 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -113,7 +113,7 @@ sub preload {
         require PublicInbox::Feed;
         require PublicInbox::View;
         require PublicInbox::SearchThread;
-        require Email::MIME;
+        require PublicInbox::MIME;
         require Digest::SHA;
         require POSIX;
 
@@ -225,8 +225,8 @@ sub get_mid_html {
         my $x = mid2blob($ctx) or return r404($ctx);
 
         require PublicInbox::View;
-        require Email::MIME;
-        my $mime = Email::MIME->new($x);
+        require PublicInbox::MIME;
+        my $mime = PublicInbox::MIME->new($x);
         searcher($ctx);
         PublicInbox::View::msg_html($ctx, $mime);
 }
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index b7c2d17a..d08f2297 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -6,7 +6,7 @@
 package PublicInbox::WatchMaildir;
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Git;
@@ -207,7 +207,7 @@ sub _path_to_mime {
                 local $/;
                 my $str = <$fh>;
                 $str or return;
-                return Email::MIME->new(\$str);
+                return PublicInbox::MIME->new(\$str);
         } elsif ($!{ENOENT}) {
                 return;
         } else {
@@ -247,7 +247,7 @@ sub _spamcheck_cb {
                 my ($mime) = @_;
                 my $tmp = '';
                 if ($sc->spamcheck($mime, \$tmp)) {
-                        return Email::MIME->new(\$tmp);
+                        return PublicInbox::MIME->new(\$tmp);
                 }
                 warn $mime->header('Message-ID')." failed spam check\n";
                 undef;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 33bfce27..a5ba5b2b 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -5,7 +5,7 @@
 package PublicInbox::WwwAttach; # internal package
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType qw(parse_content_type);
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 use PublicInbox::MsgIter;
@@ -15,7 +15,7 @@ sub get_attach ($$$) {
         my ($ctx, $idx, $fn) = @_;
         my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
         my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
-        $mime = Email::MIME->new($mime);
+        $mime = PublicInbox::MIME->new($mime);
         msg_iter($mime, sub {
                 my ($part, $depth, @idx) = @{$_[0]};
                 return if join('.', @idx) ne $idx;
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
index 396ab489..a696d348 100755
--- a/script/public-inbox-learn
+++ b/script/public-inbox-learn
@@ -10,7 +10,7 @@ use warnings;
 use PublicInbox::Config;
 use PublicInbox::Git;
 use PublicInbox::Import;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Address;
@@ -23,7 +23,7 @@ if ($train !~ /\A(?:ham|spam)\z/) {
 my $spamc = PublicInbox::Spamcheck::Spamc->new;
 my $pi_config = PublicInbox::Config->new;
 my $err;
-my $mime = Email::MIME->new(eval {
+my $mime = PublicInbox::MIME->new(eval {
         local $/;
         my $data = scalar <STDIN>;
         $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
index 8b5258f5..60f817dc 100755
--- a/script/public-inbox-mda
+++ b/script/public-inbox-mda
@@ -52,7 +52,7 @@ $simple = undef;
 $emm = PublicInbox::Emergency->new($emergency);
 $emm->prepare(\$str);
 $ems = $ems->abort;
-my $mime = Email::MIME->new(\$str);
+my $mime = PublicInbox::MIME->new(\$str);
 $str = '';
 do_exit(0) unless $spam_ok;
 
diff --git a/t/import.t b/t/import.t
index 73f92adb..29d00b05 100644
--- a/t/import.t
+++ b/t/import.t
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 use Test::More;
-use Email::MIME;
+use PublicInbox::MIME;
 use PublicInbox::Git;
 use PublicInbox::Import;
 use File::Temp qw/tempdir/;
@@ -13,7 +13,7 @@ is(system(qw(git init -q --bare), $dir), 0, 'git init successful');
 my $git = PublicInbox::Git->new($dir);
 
 my $im = PublicInbox::Import->new($git, 'testbox', 'test@example');
-my $mime = Email::MIME->create(
+my $mime = PublicInbox::MIME->create(
         header => [
                 From => 'a@example.com',
                 To => 'b@example.com',
@@ -50,7 +50,7 @@ $im->done;
 is(scalar @revs, 26, '26 revisions exist after mass import');
 my ($mark, $msg) = $im->remove($mime);
 like($mark, qr/\A:\d+\z/, 'got mark');
-is(ref($msg), 'Email::MIME', 'got old message deleted');
+is(ref($msg), 'PublicInbox::MIME', 'got old message deleted');
 
 is(undef, $im->remove($mime), 'remove is idempotent');
 
diff --git a/t/mime.t b/t/mime.t
new file mode 100644
index 00000000..cd3303d1
--- /dev/null
+++ b/t/mime.t
@@ -0,0 +1,57 @@
+# Copyright (C) 2017 all contributors <meta@public-inbox.org>
+# This library is free software; you can redistribute it and/or modify
+# it under the same terms as Perl itself.
+# Artistic or GPL-1+ <https://www.gnu.org/licenses/gpl-1.0.txt>
+use strict;
+use warnings;
+use Test::More;
+use_ok 'PublicInbox::MIME';
+
+my $msg = PublicInbox::MIME->new(
+'From:   Richard Hansen <hansenr@google.com>
+To:     git@vger.kernel.org
+Cc:     Richard Hansen <hansenr@google.com>
+Subject: [PATCH 0/2] minor diff orderfile documentation improvements
+Date:   Mon,  9 Jan 2017 19:40:29 -0500
+Message-Id: <20170110004031.57985-1-hansenr@google.com>
+X-Mailer: git-send-email 2.11.0.390.gc69c2f50cf-goog
+Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256;
+        boundary="94eb2c0bc864b76ba30545b2bca9"
+
+--94eb2c0bc864b76ba30545b2bca9
+
+Richard Hansen (2):
+  diff: document behavior of relative diff.orderFile
+  diff: document the pattern format for diff.orderFile
+
+ Documentation/diff-config.txt  | 5 ++++-
+ Documentation/diff-options.txt | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+
+--94eb2c0bc864b76ba30545b2bca9
+Content-Type: application/pkcs7-signature; name="smime.p7s"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="smime.p7s"
+Content-Description: (truncated) S/MIME Cryptographic Signature
+
+dkTlB69771K2eXK4LcHSH/2LqX+VYa3K44vrx1ruzjXdNWzIpKBy0weFNiwnJCGofvCysM2RCSI1
+--94eb2c0bc864b76ba30545b2bca9--
+
+');
+
+my @parts = $msg->parts;
+my $exp = 'Richard Hansen (2):
+  diff: document behavior of relative diff.orderFile
+  diff: document the pattern format for diff.orderFile
+
+ Documentation/diff-config.txt  | 5 ++++-
+ Documentation/diff-options.txt | 3 ++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+';
+
+ok($msg->isa('Email::MIME'), 'compatible with Email::MIME');
+is($parts[0]->body, $exp, 'body matches expected');
+
+done_testing();