user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] reject HTML loudly and automatically
@ 2015-07-14 21:02 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2015-07-14 21:02 UTC (permalink / raw)
  To: meta

This should hopefully reduce the delay between when a user fails
to send plain-text to when an admin such as myself notices the
HTML mail in a sea of spam.

Unfortunately, this can lead to backscatter, so avoid doing it
until its passed through spamc, at least.
---
 lib/PublicInbox/Filter.pm | 11 +++++++----
 public-inbox-mda          |  2 +-
 t/mda.t                   | 35 ++++++++++++++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index 6862bb6..49ba5cb 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -12,6 +12,7 @@ use Email::MIME::ContentType qw/parse_content_type/;
 use Email::Filter;
 use IPC::Run;
 our $VERSION = '0.0.1';
+use constant NO_HTML => '*** We only accept plain-text email, no HTML ***';
 
 # start with the same defaults as mailman
 our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i;
@@ -21,7 +22,7 @@ our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
 # this is highly opinionated delivery
 # returns 0 only if there is nothing to deliver
 sub run {
-	my ($class, $mime) = @_;
+	my ($class, $mime, $filter) = @_;
 
 	my $content_type = $mime->header('Content-Type') || 'text/plain';
 
@@ -38,6 +39,7 @@ sub run {
 	if ($content_type =~ m!\btext/plain\b!i) {
 		return 1; # yay, nothing to do
 	} elsif ($content_type =~ $MIME_HTML) {
+		$filter->reject(NO_HTML) if $filter;
 		# HTML-only, non-multipart
 		my $body = $mime->body;
 		my $ct_parsed = parse_content_type($content_type);
@@ -45,7 +47,7 @@ sub run {
 		replace_body($mime, $body);
 		return 1;
 	} elsif ($content_type =~ m!\bmultipart/!i) {
-		return strip_multipart($mime, $content_type);
+		return strip_multipart($mime, $content_type, $filter);
 	} else {
 		replace_body($mime, "$content_type message scrubbed");
 		return 0;
@@ -109,9 +111,9 @@ sub dump_html {
 # this is to correct user errors and not expected to cover all corner cases
 # if users don't want to hit this, they should be sending text/plain messages
 # unfortunately, too many people send HTML mail and we'll attempt to convert
-# it to something safer, smaller and harder-to-track.
+# it to something safer, smaller and harder-to-spy-on-users-with.
 sub strip_multipart {
-	my ($mime, $content_type) = @_;
+	my ($mime, $content_type, $filter) = @_;
 
 	my (@html, @keep);
 	my $rejected = 0;
@@ -133,6 +135,7 @@ sub strip_multipart {
 		if ($part_type =~ m!\btext/plain\b!i) {
 			push @keep, $part;
 		} elsif ($part_type =~ $MIME_HTML) {
+			$filter->reject(NO_HTML) if $filter;
 			push @html, $part;
 		} elsif ($part_type =~ $MIME_TEXT_ANY) {
 			# Give other text attachments the benefit of the doubt,
diff --git a/public-inbox-mda b/public-inbox-mda
index 047a482..4348cb2 100755
--- a/public-inbox-mda
+++ b/public-inbox-mda
@@ -38,7 +38,7 @@ if (PublicInbox::MDA->precheck($filter, $dst->{address}) &&
 	$filtered = undef;
 	$filter->simple($msg);
 
-	if (PublicInbox::Filter->run($msg)) {
+	if (PublicInbox::Filter->run($msg, $filter)) {
 		# run spamc again on the HTML-free message
 		if (do_spamc($msg, \$filtered)) {
 			$msg = Email::MIME->new(\$filtered);
diff --git a/t/mda.t b/t/mda.t
index 53712a5..67e86f4 100644
--- a/t/mda.t
+++ b/t/mda.t
@@ -23,6 +23,7 @@ my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock
 my $addr = 'test-public@example.com';
 my $cfgpfx = "publicinbox.test";
 my $failbox = "$home/fail.mbox";
+my $mime;
 
 {
 	ok(-x "$main_bin/spamc",
@@ -235,7 +236,7 @@ EOF
 		)
 	];
 	$mid = 'multipart-html-sucks@11';
-	my $mime = Email::MIME->create(
+	$mime = Email::MIME->create(
 		header_str => [
 		  From => 'a@example.com',
 		  Subject => 'blah',
@@ -276,6 +277,38 @@ EOF
 	ok(-d $faildir, "emergency exists");
 	my @new = glob("$faildir/new/*");
 	is(scalar(@new), 1, "message delivered");
+	is(unlink(@new), 1, "removed emergency message");
+
+	local $ENV{PATH} = $main_path;
+	$in = <<EOF;
+From: Faildir <faildir\@example.com>
+To: $addr
+Content-Type: text/html
+Message-ID: <faildir\@example.com>
+Subject: faildir subject
+Date: Thu, 01 Jan 1970 00:00:00 +0000
+
+<html><body>bad</body></html>
+EOF
+	my $out = '';
+	my $err = '';
+	run([$mda], \$in, \$out, \$err);
+	isnt($?, 0, "mda exited with failure");
+	is(length $out, 0, 'nothing in stdout');
+	isnt(length $err, 0, 'error message in stderr');
+
+	@new = glob("$faildir/new/*");
+	is(scalar(@new), 0, "new message did not show up");
+
+	# reject multipart again
+	$in = $mime->as_string;
+	$err = '';
+	run([$mda], \$in, \$out, \$err);
+	isnt($?, 0, "mda exited with failure");
+	is(length $out, 0, 'nothing in stdout');
+	isnt(length $err, 0, 'error message in stderr');
+	@new = glob("$faildir/new/*");
+	is(scalar(@new), 0, "new message did not show up");
 }
 
 done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2015-07-14 21:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-14 21:02 [PATCH] reject HTML loudly and automatically Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).