From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.8 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, RP_MATCHES_RCVD shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8C03420317 for ; Tue, 14 Jul 2015 21:02:46 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] reject HTML loudly and automatically Date: Tue, 14 Jul 2015 21:02:46 +0000 Message-Id: <1436907766-32202-1-git-send-email-e@80x24.org> List-Id: This should hopefully reduce the delay between when a user fails to send plain-text to when an admin such as myself notices the HTML mail in a sea of spam. Unfortunately, this can lead to backscatter, so avoid doing it until its passed through spamc, at least. --- lib/PublicInbox/Filter.pm | 11 +++++++---- public-inbox-mda | 2 +- t/mda.t | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm index 6862bb6..49ba5cb 100644 --- a/lib/PublicInbox/Filter.pm +++ b/lib/PublicInbox/Filter.pm @@ -12,6 +12,7 @@ use Email::MIME::ContentType qw/parse_content_type/; use Email::Filter; use IPC::Run; our $VERSION = '0.0.1'; +use constant NO_HTML => '*** We only accept plain-text email, no HTML ***'; # start with the same defaults as mailman our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i; @@ -21,7 +22,7 @@ our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i; # this is highly opinionated delivery # returns 0 only if there is nothing to deliver sub run { - my ($class, $mime) = @_; + my ($class, $mime, $filter) = @_; my $content_type = $mime->header('Content-Type') || 'text/plain'; @@ -38,6 +39,7 @@ sub run { if ($content_type =~ m!\btext/plain\b!i) { return 1; # yay, nothing to do } elsif ($content_type =~ $MIME_HTML) { + $filter->reject(NO_HTML) if $filter; # HTML-only, non-multipart my $body = $mime->body; my $ct_parsed = parse_content_type($content_type); @@ -45,7 +47,7 @@ sub run { replace_body($mime, $body); return 1; } elsif ($content_type =~ m!\bmultipart/!i) { - return strip_multipart($mime, $content_type); + return strip_multipart($mime, $content_type, $filter); } else { replace_body($mime, "$content_type message scrubbed"); return 0; @@ -109,9 +111,9 @@ sub dump_html { # this is to correct user errors and not expected to cover all corner cases # if users don't want to hit this, they should be sending text/plain messages # unfortunately, too many people send HTML mail and we'll attempt to convert -# it to something safer, smaller and harder-to-track. +# it to something safer, smaller and harder-to-spy-on-users-with. sub strip_multipart { - my ($mime, $content_type) = @_; + my ($mime, $content_type, $filter) = @_; my (@html, @keep); my $rejected = 0; @@ -133,6 +135,7 @@ sub strip_multipart { if ($part_type =~ m!\btext/plain\b!i) { push @keep, $part; } elsif ($part_type =~ $MIME_HTML) { + $filter->reject(NO_HTML) if $filter; push @html, $part; } elsif ($part_type =~ $MIME_TEXT_ANY) { # Give other text attachments the benefit of the doubt, diff --git a/public-inbox-mda b/public-inbox-mda index 047a482..4348cb2 100755 --- a/public-inbox-mda +++ b/public-inbox-mda @@ -38,7 +38,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) && $filtered = undef; $filter->simple($msg); - if (PublicInbox::Filter->run($msg)) { + if (PublicInbox::Filter->run($msg, $filter)) { # run spamc again on the HTML-free message if (do_spamc($msg, \$filtered)) { $msg = Email::MIME->new(\$filtered); diff --git a/t/mda.t b/t/mda.t index 53712a5..67e86f4 100644 --- a/t/mda.t +++ b/t/mda.t @@ -23,6 +23,7 @@ my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock my $addr = 'test-public@example.com'; my $cfgpfx = "publicinbox.test"; my $failbox = "$home/fail.mbox"; +my $mime; { ok(-x "$main_bin/spamc", @@ -235,7 +236,7 @@ EOF ) ]; $mid = 'multipart-html-sucks@11'; - my $mime = Email::MIME->create( + $mime = Email::MIME->create( header_str => [ From => 'a@example.com', Subject => 'blah', @@ -276,6 +277,38 @@ EOF ok(-d $faildir, "emergency exists"); my @new = glob("$faildir/new/*"); is(scalar(@new), 1, "message delivered"); + is(unlink(@new), 1, "removed emergency message"); + + local $ENV{PATH} = $main_path; + $in = < +To: $addr +Content-Type: text/html +Message-ID: +Subject: faildir subject +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +bad +EOF + my $out = ''; + my $err = ''; + run([$mda], \$in, \$out, \$err); + isnt($?, 0, "mda exited with failure"); + is(length $out, 0, 'nothing in stdout'); + isnt(length $err, 0, 'error message in stderr'); + + @new = glob("$faildir/new/*"); + is(scalar(@new), 0, "new message did not show up"); + + # reject multipart again + $in = $mime->as_string; + $err = ''; + run([$mda], \$in, \$out, \$err); + isnt($?, 0, "mda exited with failure"); + is(length $out, 0, 'nothing in stdout'); + isnt(length $err, 0, 'error message in stderr'); + @new = glob("$faildir/new/*"); + is(scalar(@new), 0, "new message did not show up"); } done_testing(); -- EW