about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2014-03-28 08:22:45 +0000
committerEric Wong <normalperson@yhbt.net>2014-03-28 08:22:45 +0000
commit0b4b51e8ba8cf62c8da9312666d4866ff2403d6e (patch)
tree949f23b057a76ff566e9d8d1de7f71bf6c74c6ac
parent67e53d0875a7efcb958fb9680ea87216adaf06cc (diff)
downloadpublic-inbox-0b4b51e8ba8cf62c8da9312666d4866ff2403d6e.tar.gz
Some mailers do not correctly detect/set the Content-Type header; so
attempt to keep messages based on our server-detected MIME type if
application/octet-stream was specified.
-rw-r--r--lib/PublicInbox/Filter.pm36
-rw-r--r--t/filter.t35
2 files changed, 68 insertions, 3 deletions
diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index a83ecc83..64c31e8e 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -15,6 +15,8 @@ our $VERSION = '0.0.1';
 
 # start with the same defaults as mailman
 our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i;
+our $MIME_HTML = qr!\btext/html\b!i;
+our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
 
 # this is highly opinionated delivery
 # returns 0 only if there is nothing to deliver
@@ -35,7 +37,7 @@ sub run {
 
         if ($content_type =~ m!\btext/plain\b!i) {
                 return 1; # yay, nothing to do
-        } elsif ($content_type =~ m!\btext/html\b!i) {
+        } elsif ($content_type =~ $MIME_HTML) {
                 # HTML-only, non-multipart
                 my $body = $simple->body;
                 my $ct_parsed = parse_content_type($content_type);
@@ -129,14 +131,23 @@ sub strip_multipart {
                 my $part_type = $part->content_type;
                 if ($part_type =~ m!\btext/plain\b!i) {
                         push @keep, $part;
-                } elsif ($part_type =~ m!\btext/html\b!i) {
+                } elsif ($part_type =~ $MIME_HTML) {
                         push @html, $part;
-                } elsif ($part_type =~ m!\btext/[a-z0-9\+\._-]+\b!i) {
+                } elsif ($part_type =~ $MIME_TEXT_ANY) {
                         # Give other text attachments the benefit of the doubt,
                         # here?  Could be source code or script the user wants
                         # help with.
 
                         push @keep, $part;
+                } elsif ($part_type =~ m!\Aapplication/octet-stream\z!i) {
+                        # unfortunately, some mailers don't set correct types,
+                        # let messages of unknown type through but do not
+                        # change the sender-specified type
+                        if (recheck_type_ok($part)) {
+                                push @keep, $part;
+                        } else {
+                                $rejected++;
+                        }
                 } else {
                         # reject everything else
                         #
@@ -216,4 +227,23 @@ sub replace_body {
         mark_changed($simple);
 }
 
+# run the file(1) command to detect mime type
+# Not using File::MMagic for now since that requires extra configuration
+# Note: we do not rewrite the message with the detected mime type
+sub recheck_type_ok {
+        my ($part) = @_;
+        my $cmd = "file --mime-type -b -";
+        my $pid = open2(my $out, my $in, $cmd);
+        print $in $part->body;
+        close $in;
+        my $type = eval {
+                local $/;
+                <$out>;
+        };
+        waitpid($pid, 0);
+        chomp $type;
+
+        (($type =~ $MIME_TEXT_ANY) && ($type !~ $MIME_HTML))
+}
+
 1;
diff --git a/t/filter.t b/t/filter.t
index 12f4ed6f..0aa26a5f 100644
--- a/t/filter.t
+++ b/t/filter.t
@@ -278,5 +278,40 @@ sub count_body_parts {
         is(undef, $f->simple->header("Mail-Followup-To"), "mft stripped");
 }
 
+# multi-part with application/octet-stream
+{
+        my $os = 'application/octet-stream';
+        my $parts = [
+                Email::MIME->create(
+                        attributes => { content_type => $os },
+                        body => <<EOF
+#include <stdio.h>
+int main(void)
+{
+        printf("Hello world\\n");
+        return 0;
+}
+EOF
+                ),
+                Email::MIME->create(
+                        attributes => {
+                                filename => 'zero.data',
+                                encoding => 'base64',
+                                content_type => $os,
+                        },
+                        body => ("\0" x 4096),
+                )
+        ];
+        my $email = Email::MIME->create(
+                header_str => [ From => 'a@example.com', Subject => 'blah' ],
+                parts => $parts,
+        );
+        my $f = Email::Filter->new(data => $email->as_string);
+        is(1, PublicInbox::Filter->run($f->simple), "run was a success");
+        my $parsed = Email::MIME->new($f->simple->as_string);
+        is(scalar $parsed->parts, 1, "only one remaining part");
+        like($f->simple->header("X-Content-Filtered-By"),
+                qr/PublicInbox::Filter/, "XCFB header added");
+}
 
 done_testing();