about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-09-04 21:36:58 +0000
committerEric Wong <e@80x24.org>2021-09-04 23:33:06 +0000
commit09acfa0ba87514dac4802ca88b3c95bb53d15c04 (patch)
tree60c117b099564f06b54db089d698abbdc4aa6c41
parent42420897bd81bf5664d5a93c5069dda4cb0459f1 (diff)
downloadpublic-inbox-09acfa0ba87514dac4802ca88b3c95bb53d15c04.tar.gz
We may be handling invalid mboxes, so just return no objects in
that case.  While "lei q" on HTTP(S) externals expects a gzipped
mboxrd, there's always a chance something else gzipped can be
sent to us.

There's also changes to lei_to_mail to better handle emails
which lack a body and/or headers (e.g. t/solve/bare.patch)

Link: https://public-inbox.org/meta/20210903151500.h72mzcpqixgtytjs@meerkat.local/
-rw-r--r--lib/PublicInbox/Eml.pm8
-rw-r--r--lib/PublicInbox/LeiToMail.pm21
-rw-r--r--lib/PublicInbox/MboxReader.pm3
-rw-r--r--t/mbox_reader.t23
4 files changed, 40 insertions, 15 deletions
diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm
index 955d6a96..0867a016 100644
--- a/lib/PublicInbox/Eml.pm
+++ b/lib/PublicInbox/Eml.pm
@@ -480,6 +480,14 @@ sub charset_set {
 
 sub crlf { $_[0]->{crlf} // "\n" }
 
+sub raw_size {
+        my ($self) = @_;
+        my $len = length(${$self->{hdr}});
+        defined($self->{bdy}) and
+                $len += length(${$self->{bdy}}) + length($self->{crlf});
+        $len;
+}
+
 # warnings to ignore when handling spam mailboxes and maybe other places
 sub warn_ignore {
         my $s = "@_";
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 6e102a1d..1221d3c7 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -109,32 +109,25 @@ sub _mboxcl_common ($$$) {
         $$buf .= 'Content-Length: '.length($$bdy).$crlf.
                 'Lines: '.$lines.$crlf.$crlf;
         substr($$bdy, 0, 0, $$buf); # prepend header
-        $_[0] = $bdy;
+        $$bdy .= $crlf;
+        $bdy;
 }
 
 # mboxcl still escapes "From " lines
 sub eml2mboxcl {
         my ($eml, $smsg) = @_;
         my $buf = _mbox_hdr_buf($eml, 'mboxcl', $smsg);
-        my $crlf = $eml->{crlf};
-        if (my $bdy = delete $eml->{bdy}) {
-                $$bdy =~ s/^From />From /gm;
-                _mboxcl_common($buf, $bdy, $crlf);
-        }
-        $$buf .= $crlf;
-        $buf;
+        my $bdy = delete($eml->{bdy}) // \(my $empty = '');
+        $$bdy =~ s/^From />From /gm;
+        _mboxcl_common($buf, $bdy, $eml->{crlf});
 }
 
 # mboxcl2 has no "From " escaping
 sub eml2mboxcl2 {
         my ($eml, $smsg) = @_;
         my $buf = _mbox_hdr_buf($eml, 'mboxcl2', $smsg);
-        my $crlf = $eml->{crlf};
-        if (my $bdy = delete $eml->{bdy}) {
-                _mboxcl_common($buf, $bdy, $crlf);
-        }
-        $$buf .= $crlf;
-        $buf;
+        my $bdy = delete($eml->{bdy}) // \(my $empty = '');
+        _mboxcl_common($buf, $bdy, $eml->{crlf});
 }
 
 sub git_to_mail { # git->cat_async callback
diff --git a/lib/PublicInbox/MboxReader.pm b/lib/PublicInbox/MboxReader.pm
index 9291f00b..5a754cb8 100644
--- a/lib/PublicInbox/MboxReader.pm
+++ b/lib/PublicInbox/MboxReader.pm
@@ -41,7 +41,7 @@ sub _mbox_from {
                         $raw =~ s/^\r?\n\z//ms;
                         $raw =~ s/$from_re/$1/gms;
                         my $eml = PublicInbox::Eml->new(\$raw);
-                        $eml_cb->($eml, @arg);
+                        $eml_cb->($eml, @arg) if $eml->raw_size;
                 }
                 return if $r == 0; # EOF
         }
@@ -96,6 +96,7 @@ sub _mbox_cl ($$$;@) {
                         $$hdr =~ s/\A[\r\n]*From [^\n]*\n//s or
                                 die "E: no 'From ' line in:\n", Dumper($hdr);
                         my $eml = PublicInbox::Eml->new($hdr);
+                        next unless $eml->raw_size;
                         my @cl = $eml->header_raw('Content-Length');
                         my $n = scalar(@cl);
                         $n == 0 and die "E: Content-Length missing in:\n",
diff --git a/t/mbox_reader.t b/t/mbox_reader.t
index da0ce7f1..e5f57d7b 100644
--- a/t/mbox_reader.t
+++ b/t/mbox_reader.t
@@ -71,6 +71,12 @@ my $check_fmt = sub {
                                 "Content-Length is correct $fmt $cur");
                         # clobber for ->as_string comparison below
                         $eml->header_set('Content-Length');
+
+                        # special case for t/solve/bare.patch, not sure if we
+                        # should even handle it...
+                        if ($cl[0] eq '0' && ${$eml->{hdr}} eq '') {
+                                delete $eml->{bdy};
+                        }
                 } else {
                         is(scalar(@cl), 0, "Content-Length unset $fmt $cur");
                 }
@@ -121,4 +127,21 @@ exit 1
         is(scalar(grep(/Final/, @x)), 0, 'no incomplete bit');
 }
 
+{
+        my $html = <<EOM;
+<html><head><title>hi,</title></head><body>how are you</body></html>
+EOM
+        for my $m (qw(mboxrd mboxcl mboxcl2 mboxo)) {
+                my (@w, @x);
+                local $SIG{__WARN__} = sub { push @w, @_ };
+                open my $fh, '<', \$html or xbail 'PerlIO::scalar';
+                PublicInbox::MboxReader->$m($fh, sub {
+                        push @x, $_[0]->as_string
+                });
+                is_deeply(\@x, [], "messages in invalid $m");
+                is_deeply([grep(!/^W: leftover/, @w)], [],
+                        "no extra warnings besides leftover ($m)");
+        }
+}
+
 done_testing;