about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-07-14 21:01:18 +0000
committerEric Wong <e@80x24.org>2015-07-14 21:01:18 +0000
commitf66473b1a41606d8b4cda74c551aa85d12ec37ef (patch)
treed18c2dfa87b74ba9e81a7e108c4af6bcc61448b5
parent49338dea67712c0106c09d871fca7d680c32298f (diff)
downloadpublic-inbox-f66473b1a41606d8b4cda74c551aa85d12ec37ef.tar.gz
This should hopefully reduce the delay between when a user fails
to send plain-text to when an admin such as myself notices the
HTML mail in a sea of spam.

Unfortunately, this can lead to backscatter, so avoid doing it
until its passed through spamc, at least.
-rw-r--r--lib/PublicInbox/Filter.pm11
-rwxr-xr-xpublic-inbox-mda2
-rw-r--r--t/mda.t35
3 files changed, 42 insertions, 6 deletions
diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index 6862bb68..49ba5cb2 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -12,6 +12,7 @@ use Email::MIME::ContentType qw/parse_content_type/;
 use Email::Filter;
 use IPC::Run;
 our $VERSION = '0.0.1';
+use constant NO_HTML => '*** We only accept plain-text email, no HTML ***';
 
 # start with the same defaults as mailman
 our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i;
@@ -21,7 +22,7 @@ our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
 # this is highly opinionated delivery
 # returns 0 only if there is nothing to deliver
 sub run {
-        my ($class, $mime) = @_;
+        my ($class, $mime, $filter) = @_;
 
         my $content_type = $mime->header('Content-Type') || 'text/plain';
 
@@ -38,6 +39,7 @@ sub run {
         if ($content_type =~ m!\btext/plain\b!i) {
                 return 1; # yay, nothing to do
         } elsif ($content_type =~ $MIME_HTML) {
+                $filter->reject(NO_HTML) if $filter;
                 # HTML-only, non-multipart
                 my $body = $mime->body;
                 my $ct_parsed = parse_content_type($content_type);
@@ -45,7 +47,7 @@ sub run {
                 replace_body($mime, $body);
                 return 1;
         } elsif ($content_type =~ m!\bmultipart/!i) {
-                return strip_multipart($mime, $content_type);
+                return strip_multipart($mime, $content_type, $filter);
         } else {
                 replace_body($mime, "$content_type message scrubbed");
                 return 0;
@@ -109,9 +111,9 @@ sub dump_html {
 # this is to correct user errors and not expected to cover all corner cases
 # if users don't want to hit this, they should be sending text/plain messages
 # unfortunately, too many people send HTML mail and we'll attempt to convert
-# it to something safer, smaller and harder-to-track.
+# it to something safer, smaller and harder-to-spy-on-users-with.
 sub strip_multipart {
-        my ($mime, $content_type) = @_;
+        my ($mime, $content_type, $filter) = @_;
 
         my (@html, @keep);
         my $rejected = 0;
@@ -133,6 +135,7 @@ sub strip_multipart {
                 if ($part_type =~ m!\btext/plain\b!i) {
                         push @keep, $part;
                 } elsif ($part_type =~ $MIME_HTML) {
+                        $filter->reject(NO_HTML) if $filter;
                         push @html, $part;
                 } elsif ($part_type =~ $MIME_TEXT_ANY) {
                         # Give other text attachments the benefit of the doubt,
diff --git a/public-inbox-mda b/public-inbox-mda
index 047a4827..4348cb24 100755
--- a/public-inbox-mda
+++ b/public-inbox-mda
@@ -38,7 +38,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) &&
         $filtered = undef;
         $filter->simple($msg);
 
-        if (PublicInbox::Filter->run($msg)) {
+        if (PublicInbox::Filter->run($msg, $filter)) {
                 # run spamc again on the HTML-free message
                 if (do_spamc($msg, \$filtered)) {
                         $msg = Email::MIME->new(\$filtered);
diff --git a/t/mda.t b/t/mda.t
index 53712a56..67e86f47 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -23,6 +23,7 @@ my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock
 my $addr = 'test-public@example.com';
 my $cfgpfx = "publicinbox.test";
 my $failbox = "$home/fail.mbox";
+my $mime;
 
 {
         ok(-x "$main_bin/spamc",
@@ -235,7 +236,7 @@ EOF
                 )
         ];
         $mid = 'multipart-html-sucks@11';
-        my $mime = Email::MIME->create(
+        $mime = Email::MIME->create(
                 header_str => [
                   From => 'a@example.com',
                   Subject => 'blah',
@@ -276,6 +277,38 @@ EOF
         ok(-d $faildir, "emergency exists");
         my @new = glob("$faildir/new/*");
         is(scalar(@new), 1, "message delivered");
+        is(unlink(@new), 1, "removed emergency message");
+
+        local $ENV{PATH} = $main_path;
+        $in = <<EOF;
+From: Faildir <faildir\@example.com>
+To: $addr
+Content-Type: text/html
+Message-ID: <faildir\@example.com>
+Subject: faildir subject
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+
+<html><body>bad</body></html>
+EOF
+        my $out = '';
+        my $err = '';
+        run([$mda], \$in, \$out, \$err);
+        isnt($?, 0, "mda exited with failure");
+        is(length $out, 0, 'nothing in stdout');
+        isnt(length $err, 0, 'error message in stderr');
+
+        @new = glob("$faildir/new/*");
+        is(scalar(@new), 0, "new message did not show up");
+
+        # reject multipart again
+        $in = $mime->as_string;
+        $err = '';
+        run([$mda], \$in, \$out, \$err);
+        isnt($?, 0, "mda exited with failure");
+        is(length $out, 0, 'nothing in stdout');
+        isnt(length $err, 0, 'error message in stderr');
+        @new = glob("$faildir/new/*");
+        is(scalar(@new), 0, "new message did not show up");
 }
 
 done_testing();