From 62405fb60d3153fbacba4936086587860f495fce Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 4 Apr 2014 00:39:41 +0000 Subject: filter: use regexp to check multipart bodies This should be safer than running file(1), which has had its share of vulnerabilities this year (early 2014) We really only care about diffs and maybe short log files, here. --- lib/PublicInbox/Filter.pm | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Filter.pm b/lib/PublicInbox/Filter.pm index 0b4dce87..4a348217 100644 --- a/lib/PublicInbox/Filter.pm +++ b/lib/PublicInbox/Filter.pm @@ -215,23 +215,13 @@ sub replace_body { mark_changed($simple); } -# run the file(1) command to detect mime type -# Not using File::MMagic for now since that requires extra configuration -# Note: we do not rewrite the message with the detected mime type +# Check for display-able text, no messed up binaries +# Note: we can not rewrite the message with the detected mime type sub recheck_type_ok { my ($part) = @_; - my $cmd = "file --mime-type -b -"; - my $pid = open2(my $out, my $in, $cmd); - print $in $part->body; - close $in; - my $type = eval { - local $/; - <$out>; - }; - waitpid($pid, 0); - chomp $type; - - (($type =~ $MIME_TEXT_ANY) && ($type !~ $MIME_HTML)) + my $s = $part->body; + ((bytes::length($s) < 0x10000) && + ($s =~ /\A([\P{XPosixPrint}\f\n\r\t]+)\z/)) } 1; -- cgit v1.2.3-24-ge0c7