From 09acfa0ba87514dac4802ca88b3c95bb53d15c04 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 4 Sep 2021 21:36:58 +0000 Subject: lei_to_mail+mbox_reader: fix handling of empty/bogus emails We may be handling invalid mboxes, so just return no objects in that case. While "lei q" on HTTP(S) externals expects a gzipped mboxrd, there's always a chance something else gzipped can be sent to us. There's also changes to lei_to_mail to better handle emails which lack a body and/or headers (e.g. t/solve/bare.patch) Link: https://public-inbox.org/meta/20210903151500.h72mzcpqixgtytjs@meerkat.local/ --- lib/PublicInbox/Eml.pm | 8 ++++++++ lib/PublicInbox/LeiToMail.pm | 21 +++++++-------------- lib/PublicInbox/MboxReader.pm | 3 ++- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 955d6a96..0867a016 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -480,6 +480,14 @@ sub charset_set { sub crlf { $_[0]->{crlf} // "\n" } +sub raw_size { + my ($self) = @_; + my $len = length(${$self->{hdr}}); + defined($self->{bdy}) and + $len += length(${$self->{bdy}}) + length($self->{crlf}); + $len; +} + # warnings to ignore when handling spam mailboxes and maybe other places sub warn_ignore { my $s = "@_"; diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 6e102a1d..1221d3c7 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -109,32 +109,25 @@ sub _mboxcl_common ($$$) { $$buf .= 'Content-Length: '.length($$bdy).$crlf. 'Lines: '.$lines.$crlf.$crlf; substr($$bdy, 0, 0, $$buf); # prepend header - $_[0] = $bdy; + $$bdy .= $crlf; + $bdy; } # mboxcl still escapes "From " lines sub eml2mboxcl { my ($eml, $smsg) = @_; my $buf = _mbox_hdr_buf($eml, 'mboxcl', $smsg); - my $crlf = $eml->{crlf}; - if (my $bdy = delete $eml->{bdy}) { - $$bdy =~ s/^From />From /gm; - _mboxcl_common($buf, $bdy, $crlf); - } - $$buf .= $crlf; - $buf; + my $bdy = delete($eml->{bdy}) // \(my $empty = ''); + $$bdy =~ s/^From />From /gm; + _mboxcl_common($buf, $bdy, $eml->{crlf}); } # mboxcl2 has no "From " escaping sub eml2mboxcl2 { my ($eml, $smsg) = @_; my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $smsg); - my $crlf = $eml->{crlf}; - if (my $bdy = delete $eml->{bdy}) { - _mboxcl_common($buf, $bdy, $crlf); - } - $$buf .= $crlf; - $buf; + my $bdy = delete($eml->{bdy}) // \(my $empty = ''); + _mboxcl_common($buf, $bdy, $eml->{crlf}); } sub git_to_mail { # git->cat_async callback diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm index 9291f00b..5a754cb8 100644 --- a/lib/PublicInbox/MboxReader.pm +++ b/lib/PublicInbox/MboxReader.pm @@ -41,7 +41,7 @@ sub _mbox_from { $raw =~ s/^\r?\n\z//ms; $raw =~ s/$from_re/$1/gms; my $eml = PublicInbox::Eml->new(\$raw); - $eml_cb->($eml, @arg); + $eml_cb->($eml, @arg) if $eml->raw_size; } return if $r == 0; # EOF } @@ -96,6 +96,7 @@ sub _mbox_cl ($$$;@) { $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or die "E: no 'From ' line in:\n", Dumper($hdr); my $eml = PublicInbox::Eml->new($hdr); + next unless $eml->raw_size; my @cl = $eml->header_raw('Content-Length'); my $n = scalar(@cl); $n == 0 and die "E: Content-Length missing in:\n", -- cgit v1.2.3-24-ge0c7