about summary refs log tree commit homepage
path: root/lib/PublicInbox/Filter.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/Filter.pm')
-rw-r--r--lib/PublicInbox/Filter.pm36
1 files changed, 33 insertions, 3 deletions
diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm
index a83ecc83..64c31e8e 100644
--- a/lib/PublicInbox/Filter.pm
+++ b/lib/PublicInbox/Filter.pm
@@ -15,6 +15,8 @@ our $VERSION = '0.0.1';
 
 # start with the same defaults as mailman
 our $BAD_EXT = qr/\.(?:exe|bat|cmd|com|pif|scr|vbs|cpl)\z/i;
+our $MIME_HTML = qr!\btext/html\b!i;
+our $MIME_TEXT_ANY = qr!\btext/[a-z0-9\+\._-]+\b!i;
 
 # this is highly opinionated delivery
 # returns 0 only if there is nothing to deliver
@@ -35,7 +37,7 @@ sub run {
 
         if ($content_type =~ m!\btext/plain\b!i) {
                 return 1; # yay, nothing to do
-        } elsif ($content_type =~ m!\btext/html\b!i) {
+        } elsif ($content_type =~ $MIME_HTML) {
                 # HTML-only, non-multipart
                 my $body = $simple->body;
                 my $ct_parsed = parse_content_type($content_type);
@@ -129,14 +131,23 @@ sub strip_multipart {
                 my $part_type = $part->content_type;
                 if ($part_type =~ m!\btext/plain\b!i) {
                         push @keep, $part;
-                } elsif ($part_type =~ m!\btext/html\b!i) {
+                } elsif ($part_type =~ $MIME_HTML) {
                         push @html, $part;
-                } elsif ($part_type =~ m!\btext/[a-z0-9\+\._-]+\b!i) {
+                } elsif ($part_type =~ $MIME_TEXT_ANY) {
                         # Give other text attachments the benefit of the doubt,
                         # here?  Could be source code or script the user wants
                         # help with.
 
                         push @keep, $part;
+                } elsif ($part_type =~ m!\Aapplication/octet-stream\z!i) {
+                        # unfortunately, some mailers don't set correct types,
+                        # let messages of unknown type through but do not
+                        # change the sender-specified type
+                        if (recheck_type_ok($part)) {
+                                push @keep, $part;
+                        } else {
+                                $rejected++;
+                        }
                 } else {
                         # reject everything else
                         #
@@ -216,4 +227,23 @@ sub replace_body {
         mark_changed($simple);
 }
 
+# run the file(1) command to detect mime type
+# Not using File::MMagic for now since that requires extra configuration
+# Note: we do not rewrite the message with the detected mime type
+sub recheck_type_ok {
+        my ($part) = @_;
+        my $cmd = "file --mime-type -b -";
+        my $pid = open2(my $out, my $in, $cmd);
+        print $in $part->body;
+        close $in;
+        my $type = eval {
+                local $/;
+                <$out>;
+        };
+        waitpid($pid, 0);
+        chomp $type;
+
+        (($type =~ $MIME_TEXT_ANY) && ($type !~ $MIME_HTML))
+}
+
 1;