From 3fc59df0d633a17e0c5e43d633d12e8772c06ec3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 10 Jan 2017 21:40:37 +0000 Subject: introduce PublicInbox::MIME wrapper class This should fix problems with multipart messages where text/plain parts lack a header. cf. git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head In the future, we may still introduce as streaming interface to reduce memory usage on large emails. --- lib/PublicInbox/Feed.pm | 6 +-- lib/PublicInbox/Filter/Vger.pm | 2 +- lib/PublicInbox/Import.pm | 2 +- lib/PublicInbox/MIME.pm | 102 ++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/MsgIter.pm | 22 +-------- lib/PublicInbox/Search.pm | 2 +- lib/PublicInbox/SearchIdx.pm | 4 +- lib/PublicInbox/SearchView.pm | 6 +-- lib/PublicInbox/View.pm | 8 ++-- lib/PublicInbox/WWW.pm | 6 +-- lib/PublicInbox/WatchMaildir.pm | 6 +-- lib/PublicInbox/WwwAttach.pm | 4 +- 12 files changed, 126 insertions(+), 44 deletions(-) create mode 100644 lib/PublicInbox/MIME.pm (limited to 'lib') diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 2a33fd29..e5d57550 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -5,7 +5,7 @@ package PublicInbox::Feed; use strict; use warnings; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::View; use PublicInbox::WwwAtomStream; @@ -39,7 +39,7 @@ sub generate_thread_atom { PublicInbox::WwwAtomStream->response($ctx, 200, sub { while (my $msg = shift @$msgs) { $msg = $ibx->msg_by_smsg($msg) and - return Email::MIME->new($msg); + return PublicInbox::MIME->new($msg); } }); } @@ -175,7 +175,7 @@ sub each_recent_blob { sub do_cat_mail { my ($ibx, $path) = @_; my $mime = eval { $ibx->msg_by_path($path) } or return; - Email::MIME->new($mime); + PublicInbox::MIME->new($mime); } 1; diff --git a/lib/PublicInbox/Filter/Vger.pm b/lib/PublicInbox/Filter/Vger.pm index 2ffed184..905f28d7 100644 --- a/lib/PublicInbox/Filter/Vger.pm +++ b/lib/PublicInbox/Filter/Vger.pm @@ -25,7 +25,7 @@ sub scrub { # so in multipart (e.g. GPG-signed) messages, the list trailer # becomes invisible to MIME-aware email clients. if ($s =~ s/$l0\n$l1\n$l2\n$l3\n($l4\n)?\z//os) { - $mime = Email::MIME->new(\$s); + $mime = PublicInbox::MIME->new(\$s); } $self->ACCEPT($mime); } diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index 1ac112b8..13671a4f 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -119,7 +119,7 @@ sub remove { $n = read($r, my $lf, 1); defined($n) or die "read final byte of cat-blob failed: $!"; die "bad read on final byte: <$lf>" if $lf ne "\n"; - my $cur = Email::MIME->new($buf); + my $cur = PublicInbox::MIME->new($buf); my $cur_s = $cur->header('Subject'); $cur_s = '' unless defined $cur_s; my $cur_m = $mime->header('Subject'); diff --git a/lib/PublicInbox/MIME.pm b/lib/PublicInbox/MIME.pm new file mode 100644 index 00000000..792fffd6 --- /dev/null +++ b/lib/PublicInbox/MIME.pm @@ -0,0 +1,102 @@ +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# +# The license for this file differs from the rest of public-inbox. +# +# It monkey patches the "parts_multipart" subroutine with patches +# from Matthew Horsfall at: +# +# git clone --mirror https://github.com/rjbs/Email-MIME.git refs/pull/28/head +# +# commit fe0eb870ab732507aa39a1070a2fd9435c7e4877 +# ("Make sure we don't modify the body of a message when injecting a header.") +# commit 981d8201a7239b02114489529fd366c4c576a146 +# ("GH #14 - Handle CRLF emails properly.") +# commit 2338d93598b5e8432df24bda8dfdc231bdeb666e +# ("GH #14 - Support multipart messages without content-type in subparts.") +# +# For Email::MIME >= 1.923 && < 1.935, +# commit dcef9be66c49ae89c7a5027a789bbbac544499ce +# ("removing all trailing newlines was too much") +# is also included +package PublicInbox::MIME; +use strict; +use warnings; +use base qw(Email::MIME); + +if ($Email::MIME::VERSION <= 1.937) { +sub parts_multipart { + my $self = shift; + my $boundary = $self->{ct}->{attributes}->{boundary}; + + # Take a message, join all its lines together. Now try to Email::MIME->new + # it with 1.861 or earlier. Death! It tries to recurse endlessly on the + # body, because every time it splits on boundary it gets itself. Obviously + # that means it's a bogus message, but a mangled result (or exception) is + # better than endless recursion. -- rjbs, 2008-01-07 + return $self->parts_single_part + unless $boundary and $self->body_raw =~ /^--\Q$boundary\E\s*$/sm; + + $self->{body_raw} = $self->SUPER::body; + + # rfc1521 7.2.1 + my ($body, $epilogue) = split /^--\Q$boundary\E--\s*$/sm, $self->body_raw, 2; + + # Split on boundaries, but keep blank lines after them intact + my @bits = split /^--\Q$boundary\E\s*?(?=$self->{mycrlf})/m, ($body || ''); + + $self->SUPER::body_set(undef); + + # If there are no headers in the potential MIME part, it's just part of the + # body. This is a horrible hack, although it's debatable whether it was + # better or worse when it was $self->{body} = shift @bits ... -- rjbs, + # 2006-11-27 + $self->SUPER::body_set(shift @bits) if ($bits[0] || '') !~ /.*:.*/; + + my $bits = @bits; + + my @parts; + for my $bit (@bits) { + # Parts don't need headers. If they don't have them, they look like this: + # + # --90e6ba6e8d06f1723604fc1b809a + # + # Part 2 + # + # Part 2a + # + # $bit will contain two new lines before Part 2. + # + # Anything with headers will only have one new line. + # + # RFC 1341 Section 7.2 says parts without headers are to be considered + # plain US-ASCII text. -- alh + # 2016-08-01 + my $added_header; + + if ($bit =~ /^(?:$self->{mycrlf}){2}/) { + $bit = "Content-type: text/plain; charset=us-ascii" . $bit; + + $added_header = 1; + } + + $bit =~ s/\A[\n\r]+//smg; + $bit =~ s/(?{mycrlf}\Z//sm; + + my $email = (ref $self)->new($bit); + + if ($added_header) { + # Remove our changes so we don't change the raw email content + $email->header_str_set('Content-Type'); + } + + push @parts, $email; + } + + $self->{parts} = \@parts; + + return @{ $self->{parts} }; +} +} + +1; diff --git a/lib/PublicInbox/MsgIter.pm b/lib/PublicInbox/MsgIter.pm index ef0d209f..5be06a44 100644 --- a/lib/PublicInbox/MsgIter.pm +++ b/lib/PublicInbox/MsgIter.pm @@ -6,17 +6,7 @@ use strict; use warnings; use base qw(Exporter); our @EXPORT = qw(msg_iter); -use Email::MIME; -use Scalar::Util qw(readonly); - -# Workaround Email::MIME versions without -# commit dcef9be66c49ae89c7a5027a789bbbac544499ce -# ("removing all trailing newlines was too much") -# This is necessary for Debian jessie -my $bad = 1.923; -my $good = 1.935; -my $ver = $Email::MIME::VERSION; -my $extra_nl = 1 if ($ver >= $bad && $ver < $good); +use PublicInbox::MIME; # Like Email::MIME::walk_parts, but this is: # * non-recursive @@ -36,16 +26,6 @@ sub msg_iter ($$) { @sub = map { [ $_, $depth, @idx, ++$i ] } @sub; @parts = (@sub, @parts); } else { - if ($extra_nl) { - my $lf = $part->{mycrlf}; - my $bref = $part->{body}; - if (readonly($$bref)) { - my $s = $$bref . $lf; - $part->{body} = \$s; - } else { - $$bref .= $lf; - } - } $cb->($p); } } diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index a1bae419..c9094245 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -16,7 +16,7 @@ use constant YYYYMMDD => 4; # for searching in the WWW UI use Search::Xapian qw/:standard/; use PublicInbox::SearchMsg; -use Email::MIME; +use PublicInbox::MIME; use PublicInbox::MID qw/mid_clean id_compress/; # This is English-only, everything else is non-standard and may be confused as diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 87ee0d46..d63dd7c7 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -10,7 +10,7 @@ package PublicInbox::SearchIdx; use strict; use warnings; use Fcntl qw(:flock :DEFAULT); -use Email::MIME; +use PublicInbox::MIME; use Email::MIME::ContentType; $Email::MIME::ContentType::STRICT_PARAMS = 0; use base qw(PublicInbox::Search); @@ -400,7 +400,7 @@ sub do_cat_mail { my $str = $git->cat_file($blob, $sizeref); # fixup bugs from import: $$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; - Email::MIME->new($str); + PublicInbox::MIME->new($str); }; $@ ? undef : $mime; } diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index bd634d8d..ccc53abf 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -10,7 +10,7 @@ use PublicInbox::Hval qw/ascii_html/; use PublicInbox::View; use PublicInbox::WwwAtomStream; use PublicInbox::MID qw(mid2path mid_mime mid_clean mid_escape); -use Email::MIME; +use PublicInbox::MIME; require PublicInbox::Git; require PublicInbox::SearchThread; our $LIM = 50; @@ -205,7 +205,7 @@ sub mset_thread { $mime = $inbox->msg_by_smsg($mime) and last; } if ($mime) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return PublicInbox::View::index_entry($mime, $ctx, scalar @$msgs); } @@ -239,7 +239,7 @@ sub adump { while (my $x = shift @items) { $x = load_doc_retry($srch, $x); $x = $ibx->msg_by_smsg($x) and - return Email::MIME->new($x); + return PublicInbox::MIME->new($x); } return undef; }); diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index e4e9d7d2..2c37cd42 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -299,7 +299,7 @@ sub stream_thread ($$) { } return missing_thread($ctx) unless $mime; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); $ctx->{-title_html} = ascii_html($mime->header('Subject')); $ctx->{-html_tip} = thread_index_entry($ctx, $level, $mime); PublicInbox::WwwStream->response($ctx, 200, sub { @@ -311,7 +311,7 @@ sub stream_thread ($$) { unshift @q, map { ($cl, $_) } @{$node->{children}}; my $mid = $node->{id}; if ($mime = $inbox->msg_by_smsg($node->{smsg})) { - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); return thread_index_entry($ctx, $level, $mime); } else { return ghost_index_entry($ctx, $level, $node); @@ -362,7 +362,7 @@ sub thread_html { $mime = $inbox->msg_by_smsg($mime) and last; } return missing_thread($ctx) unless $mime; - $mime = Email::MIME->new($mime); + $mime = PublicInbox::MIME->new($mime); $ctx->{-title_html} = ascii_html($mime->header('Subject')); $ctx->{-html_tip} = '
'.index_entry($mime, $ctx, scalar @$msgs);
 	$mime = undef;
@@ -372,7 +372,7 @@ sub thread_html {
 			$mime = $inbox->msg_by_smsg($mime) and last;
 		}
 		if ($mime) {
-			$mime = Email::MIME->new($mime);
+			$mime = PublicInbox::MIME->new($mime);
 			return index_entry($mime, $ctx, scalar @$msgs);
 		}
 		$msgs = undef;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 11fc92e9..430e6b19 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -113,7 +113,7 @@ sub preload {
 	require PublicInbox::Feed;
 	require PublicInbox::View;
 	require PublicInbox::SearchThread;
-	require Email::MIME;
+	require PublicInbox::MIME;
 	require Digest::SHA;
 	require POSIX;
 
@@ -225,8 +225,8 @@ sub get_mid_html {
 	my $x = mid2blob($ctx) or return r404($ctx);
 
 	require PublicInbox::View;
-	require Email::MIME;
-	my $mime = Email::MIME->new($x);
+	require PublicInbox::MIME;
+	my $mime = PublicInbox::MIME->new($x);
 	searcher($ctx);
 	PublicInbox::View::msg_html($ctx, $mime);
 }
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index b7c2d17a..d08f2297 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -6,7 +6,7 @@
 package PublicInbox::WatchMaildir;
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType;
 $Email::MIME::ContentType::STRICT_PARAMS = 0; # user input is imperfect
 use PublicInbox::Git;
@@ -207,7 +207,7 @@ sub _path_to_mime {
 		local $/;
 		my $str = <$fh>;
 		$str or return;
-		return Email::MIME->new(\$str);
+		return PublicInbox::MIME->new(\$str);
 	} elsif ($!{ENOENT}) {
 		return;
 	} else {
@@ -247,7 +247,7 @@ sub _spamcheck_cb {
 		my ($mime) = @_;
 		my $tmp = '';
 		if ($sc->spamcheck($mime, \$tmp)) {
-			return Email::MIME->new(\$tmp);
+			return PublicInbox::MIME->new(\$tmp);
 		}
 		warn $mime->header('Message-ID')." failed spam check\n";
 		undef;
diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm
index 33bfce27..a5ba5b2b 100644
--- a/lib/PublicInbox/WwwAttach.pm
+++ b/lib/PublicInbox/WwwAttach.pm
@@ -5,7 +5,7 @@
 package PublicInbox::WwwAttach; # internal package
 use strict;
 use warnings;
-use Email::MIME;
+use PublicInbox::MIME;
 use Email::MIME::ContentType qw(parse_content_type);
 $Email::MIME::ContentType::STRICT_PARAMS = 0;
 use PublicInbox::MsgIter;
@@ -15,7 +15,7 @@ sub get_attach ($$$) {
 	my ($ctx, $idx, $fn) = @_;
 	my $res = [ 404, [ 'Content-Type', 'text/plain' ], [ "Not found\n" ] ];
 	my $mime = $ctx->{-inbox}->msg_by_mid($ctx->{mid}) or return $res;
-	$mime = Email::MIME->new($mime);
+	$mime = PublicInbox::MIME->new($mime);
 	msg_iter($mime, sub {
 		my ($part, $depth, @idx) = @{$_[0]};
 		return if join('.', @idx) ne $idx;
-- 
cgit v1.2.3-24-ge0c7