From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id B630A1F8C9 for ; Thu, 7 May 2020 21:05:58 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 08/13] EmlContentFoo: Email::MIME::ContentType replacement Date: Thu, 7 May 2020 21:05:51 +0000 Message-Id: <20200507210556.22995-9-e@yhbt.net> In-Reply-To: <20200507210556.22995-1-e@yhbt.net> References: <20200507210556.22995-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: Since we're getting rid of Email::MIME, get rid of Email::MIME::ContentType, too; since we may introduce speedups down the line specific to our codebase. --- MANIFEST | 3 + lib/PublicInbox/Eml.pm | 7 +- lib/PublicInbox/EmlContentFoo.pm | 294 +++++++++++++++++++++++++++++++ lib/PublicInbox/WwwAttach.pm | 2 +- t/eml_content_disposition.t | 102 +++++++++++ t/eml_content_type.t | 289 ++++++++++++++++++++++++++++++ 6 files changed, 692 insertions(+), 5 deletions(-) create mode 100644 lib/PublicInbox/EmlContentFoo.pm create mode 100644 t/eml_content_disposition.t create mode 100644 t/eml_content_type.t diff --git a/MANIFEST b/MANIFEST index 0906448e..055c8c9a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -106,6 +106,7 @@ lib/PublicInbox/DSPoll.pm lib/PublicInbox/Daemon.pm lib/PublicInbox/Emergency.pm lib/PublicInbox/Eml.pm +lib/PublicInbox/EmlContentFoo.pm lib/PublicInbox/ExtMsg.pm lib/PublicInbox/Feed.pm lib/PublicInbox/Filter/Base.pm @@ -231,6 +232,8 @@ t/ds-poll.t t/edit.t t/emergency.t t/eml.t +t/eml_content_disposition.t +t/eml_content_type.t t/epoll.t t/fail-bin/spamc t/feed.t diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 0c23bed0..1988bdb3 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -33,10 +33,9 @@ use Text::Wrap qw(wrap); # stdlib, we need Perl 5.6+ for $huge my $MIME_Header = find_encoding('MIME-Header'); -# TODO remove these dependencies -use Email::MIME::ContentType; +use PublicInbox::EmlContentFoo qw(parse_content_type parse_content_disposition); use Email::MIME::Encodings; -$Email::MIME::ContentType::STRICT_PARAMS = 0; +$PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; our $MAXPARTS = 1000; # same as SpamAssassin our $MAXDEPTH = 20; # seems enough, Perl sucks, here @@ -108,7 +107,7 @@ sub header_raw { # pick the first Content-Type header to match Email::MIME behavior. # It's usually the right one based on historical archives. sub ct ($) { - # Email::MIME::ContentType::content_type: + # PublicInbox::EmlContentFoo::content_type: $_[0]->{ct} //= parse_content_type(header($_[0], 'Content-Type')); } diff --git a/lib/PublicInbox/EmlContentFoo.pm b/lib/PublicInbox/EmlContentFoo.pm new file mode 100644 index 00000000..f507d548 --- /dev/null +++ b/lib/PublicInbox/EmlContentFoo.pm @@ -0,0 +1,294 @@ +# Copyright (C) 2020 all contributors +# Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# +# License: GPL-1.0+ or Artistic-1.0-Perl +# +# +# +# This license differs from the rest of public-inbox +# +# This is a fork of the Email::MIME::ContentType 1.022 with +# minor improvements and incompatibilities; namely changes to +# quiet warnings with legacy data. +package PublicInbox::EmlContentFoo; +use strict; +use parent qw(Exporter); +# ABSTRACT: Parse a MIME Content-Type or Content-Disposition Header + +use Encode 2.87 qw(find_mime_encoding); +our @EXPORT_OK = qw(parse_content_type parse_content_disposition); + +our $STRICT_PARAMS = 1; + +my $ct_default = 'text/plain; charset=us-ascii'; + +my $re_token = # US-ASCII except SPACE, CTLs and tspecials ()<>@,;:\\"/[]?= + qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/; + +my $re_token_non_strict = # allow CTLs and above ASCII + qr/([\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF]+|$re_token)/; + +my $re_qtext = # US-ASCII except CR, LF, white space, backslash and quote + qr/[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7E\x7F]/; +my $re_quoted_pair = qr/\\[\x00-\x7F]/; +my $re_quoted_string = qr/"((?:[ \t]*(?:$re_qtext|$re_quoted_pair))*[ \t]*)"/; + +my $re_qtext_non_strict = qr/[\x80-\xFF]|$re_qtext/; +my $re_quoted_pair_non_strict = qr/\\[\x00-\xFF]/; +my $re_quoted_string_non_strict = +qr/"((?:[ \t]*(?:$re_qtext_non_strict|$re_quoted_pair_non_strict))*[ \t]*)"/; + +my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/; +my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/; +my $re_exvalue = qr/($re_charset)?'(?:$re_language)?'(.*)/; + +sub parse_content_type { + my ($ct) = @_; + + # If the header isn't there or is empty, give default answer. + $ct = $ct_default unless defined($ct) && length($ct); + + _unfold_lines($ct); + _clean_comments($ct); + + # It is also recommend (sic.) that this default be assumed when a + # syntactically invalid Content-Type header field is encountered. + unless ($ct =~ s/^($re_token)\/($re_token)//) { + unless ($STRICT_PARAMS && $ct =~ s/^($re_token_non_strict)\/ + ($re_token_non_strict)//x) { + #carp "Invalid Content-Type '$ct'"; + return parse_content_type($ct_default); + } + } + + my ($type, $subtype) = (lc $1, lc $2); + + _clean_comments($ct); + $ct =~ s/\s+$//; + + my $attributes = {}; + if ($STRICT_PARAMS && length($ct) && $ct !~ /^;/) { + # carp "Missing ';' before first Content-Type parameter '$ct'"; + } else { + $attributes = _process_rfc2231(_parse_attributes($ct)); + } + + { + type => $type, + subtype => $subtype, + attributes => $attributes, + + # This is dumb. Really really dumb. For backcompat. -- rjbs, + # 2013-08-10 + discrete => $type, + composite => $subtype, + }; +} + +my $cd_default = 'attachment'; + +sub parse_content_disposition { + my ($cd) = @_; + + $cd = $cd_default unless defined($cd) && length($cd); + + _unfold_lines($cd); + _clean_comments($cd); + + unless ($cd =~ s/^($re_token)//) { + unless ($STRICT_PARAMS and $cd =~ s/^($re_token_non_strict)//) { + #carp "Invalid Content-Disposition '$cd'"; + return parse_content_disposition($cd_default); + } + } + + my $type = lc $1; + + _clean_comments($cd); + $cd =~ s/\s+$//; + + my $attributes = {}; + if ($STRICT_PARAMS && length($cd) && $cd !~ /^;/) { +# carp "Missing ';' before first Content-Disposition parameter '$cd'"; + } else { + $attributes = _process_rfc2231(_parse_attributes($cd)); + } + + { + type => $type, + attributes => $attributes, + }; +} + +sub _unfold_lines { + $_[0] =~ s/(?:\r\n|[\r\n])(?=[ \t])//g; +} + +sub _clean_comments { + my $ret = ($_[0] =~ s/^\s+//); + while (length $_[0]) { + last unless $_[0] =~ s/^\(//; + my $level = 1; + while (length $_[0]) { + my $ch = substr $_[0], 0, 1, ''; + if ($ch eq '(') { + $level++; + } elsif ($ch eq ')') { + $level--; + last if $level == 0; + } elsif ($ch eq '\\') { + substr $_[0], 0, 1, ''; + } + } + # carp "Unbalanced comment" if $level != 0 and $STRICT_PARAMS; + $ret |= ($_[0] =~ s/^\s+//); + } + $ret; +} + +sub _process_rfc2231 { + my ($attribs) = @_; + my %cont; + my %encoded; + foreach (keys %{$attribs}) { + next unless $_ =~ m/^(.*)\*([0-9])\*?$/; + my ($attr, $sec) = ($1, $2); + $cont{$attr}->[$sec] = $attribs->{$_}; + $encoded{$attr}->[$sec] = 1 if $_ =~ m/\*$/; + delete $attribs->{$_}; + } + foreach (keys %cont) { + my $key = $_; + $key .= '*' if $encoded{$_}; + $attribs->{$key} = join '', @{$cont{$_}}; + } + foreach (keys %{$attribs}) { + next unless $_ =~ m/^(.*)\*$/; + my $key = $1; + next unless $attribs->{$_} =~ m/^$re_exvalue$/; + my ($charset, $value) = ($1, $2); + $value =~ s/%([0-9A-Fa-f]{2})/pack('C', hex($1))/eg; + if (length $charset) { + my $enc = find_mime_encoding($charset); + if (defined $enc) { + $value = $enc->decode($value); + # } else { + #carp "Unknown charset '$charset' in + #attribute '$key' value"; + } + } + $attribs->{$key} = $value; + delete $attribs->{$_}; + } + $attribs; +} + +sub _parse_attributes { + local $_ = shift; + substr($_, 0, 0, '; ') if length $_ and $_ !~ /^;/; + my $attribs = {}; + while (length $_) { + s/^;// or $STRICT_PARAMS and do { + #carp "Missing semicolon before parameter '$_'"; + return $attribs; + }; + _clean_comments($_); + unless (length $_) { + # Some mail software generates a Content-Type like this: + # "Content-Type: text/plain;" + # RFC 1521 section 3 says a parameter must exist if + # there is a semicolon. + #carp "Extra semicolon after last parameter" if + #$STRICT_PARAMS; + return $attribs; + } + my $attribute; + if (s/^($re_token)=//) { + $attribute = lc $1; + } else { + if ($STRICT_PARAMS) { + # carp "Illegal parameter '$_'"; + return $attribs; + } + if (s/^($re_token_non_strict)=//) { + $attribute = lc $1; + } else { + unless (s/^([^;=\s]+)\s*=//) { + #carp "Cannot parse parameter '$_'"; + return $attribs; + } + $attribute = lc $1; + } + } + _clean_comments($_); + my $value = _extract_attribute_value(); + $attribs->{$attribute} = $value; + _clean_comments($_); + } + $attribs; +} + +sub _extract_attribute_value { # EXPECTS AND MODIFIES $_ + my $value; + while (length $_) { + if (s/^($re_token)//) { + $value .= $1; + } elsif (s/^$re_quoted_string//) { + my $sub = $1; + $sub =~ s/\\(.)/$1/g; + $value .= $sub; + } elsif ($STRICT_PARAMS) { + #my $char = substr $_, 0, 1; + #carp "Unquoted '$char' not allowed"; + return; + } elsif (s/^($re_token_non_strict)//) { + $value .= $1; + } elsif (s/^$re_quoted_string_non_strict//) { + my $sub = $1; + $sub =~ s/\\(.)/$1/g; + $value .= $sub; + } + my $erased = _clean_comments($_); + last if !length $_ or /^;/; + if ($STRICT_PARAMS) { + #my $char = substr $_, 0, 1; + #carp "Extra '$char' found after parameter"; + return; + } + if ($erased) { + # Sometimes semicolon is missing, so check for = char + last if m/^$re_token_non_strict=/; + $value .= ' '; + } + $value .= substr $_, 0, 1, ''; + } + $value; +} + +1; +__END__ +=func parse_content_type + +This routine is exported by default. + +This routine parses email content type headers according to section 5.1 of RFC +2045 and also RFC 2231 (Character Set and Parameter Continuations). It returns +a hash as above, with entries for the C, the C, and a hash of +C. + +For backward compatibility with a really unfortunate misunderstanding of RFC +2045 by the early implementors of this module, C and C are +also present in the returned hashref, with the values of C and C +respectively. + +=func parse_content_disposition + +This routine is exported by default. + +This routine parses email Content-Disposition headers according to RFC 2183 and +RFC 2231. It returns a hash as above, with entries for the C, and a hash +of C. + +=cut diff --git a/lib/PublicInbox/WwwAttach.pm b/lib/PublicInbox/WwwAttach.pm index 5b2914b3..754da13f 100644 --- a/lib/PublicInbox/WwwAttach.pm +++ b/lib/PublicInbox/WwwAttach.pm @@ -6,7 +6,7 @@ package PublicInbox::WwwAttach; # internal package use strict; use warnings; use bytes (); # only for bytes::length -use Email::MIME::ContentType qw(parse_content_type); +use PublicInbox::EmlContentFoo qw(parse_content_type); use PublicInbox::Eml; sub get_attach_i { # ->each_part callback diff --git a/t/eml_content_disposition.t b/t/eml_content_disposition.t new file mode 100644 index 00000000..9bdacc05 --- /dev/null +++ b/t/eml_content_disposition.t @@ -0,0 +1,102 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# +# License: GPL-1.0+ or Artistic-1.0-Perl +# +# +use strict; +use Test::More; +use PublicInbox::EmlContentFoo qw(parse_content_disposition); + +my %cd_tests = ( + '' => { type => 'attachment', attributes => {} }, + 'inline' => { type => 'inline', attributes => {} }, + 'attachment' => { type => 'attachment', attributes => {} }, + + 'attachment; filename=genome.jpeg;' . + ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500"' => { + type => 'attachment', + attributes => { + filename => 'genome.jpeg', + 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' + } + }, + + q(attachment; filename*=UTF-8''genome.jpeg;) . + q( modification-date="Wed, 12 Feb 1997 16:29:51 -0500") => { + type => 'attachment', + attributes => { + filename => 'genome.jpeg', + 'modification-date' => 'Wed, 12 Feb 1997 16:29:51 -0500' + } + }, + + q(attachment; filename*0*=us-ascii'en'This%20is%20even%20more%20;) . + q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { + type => 'attachment', + attributes => { + filename => "This is even more ***fun*** isn't it!" + } + }, + + q(attachment; filename*0*='en'This%20is%20even%20more%20;) . + q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { + type => 'attachment', + attributes => { + filename => "This is even more ***fun*** isn't it!" + } + }, + + q(attachment; filename*0*=''This%20is%20even%20more%20;) . + q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { + type => 'attachment', + attributes => { + filename => "This is even more ***fun*** isn't it!" + } + }, + + q(attachment; filename*0*=us-ascii''This%20is%20even%20more%20;). + q( filename*1*=%2A%2A%2Afun%2A%2A%2A%20; filename*2="isn't it!") => { + type => 'attachment', + attributes => { + filename => "This is even more ***fun*** isn't it!" + } + }, +); + +my %non_strict_cd_tests = ( + 'attachment; filename=genome.jpeg;' . + ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500";' => { + type => 'attachment', + attributes => { + filename => 'genome.jpeg', + 'modification-date' => + 'Wed, 12 Feb 1997 16:29:51 -0500' + } + }, +); + +sub test { + my ($string, $expect, $info) = @_; + local $_; + $info =~ s/\r/\\r/g; + $info =~ s/\n/\\n/g; + is_deeply(parse_content_disposition($string), $expect, $info); +} + +for (sort keys %cd_tests) { + test($_, $cd_tests{$_}, "Can parse C-D <$_>"); +} + +local $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; +for (sort keys %cd_tests) { + test($_, $cd_tests{$_}, "Can parse non-strict C-D <$_>"); +} +for (sort keys %non_strict_cd_tests) { + test($_, $non_strict_cd_tests{$_}, "Can parse non-strict C-D <$_>"); +} + +done_testing; diff --git a/t/eml_content_type.t b/t/eml_content_type.t new file mode 100644 index 00000000..5fd7d1d9 --- /dev/null +++ b/t/eml_content_type.t @@ -0,0 +1,289 @@ +#!perl -w +# Copyright (C) 2020 all contributors +# Copyright (C) 2004- Simon Cozens, Casey West, Ricardo SIGNES +# This library is free software; you can redistribute it and/or modify +# it under the same terms as Perl itself. +# +# License: GPL-1.0+ or Artistic-1.0-Perl +# +# +use strict; +use Test::More; +use PublicInbox::EmlContentFoo qw(parse_content_type); + +my %ct_tests = ( + '' => { + type => "text", + subtype => "plain", + attributes => { charset => "us-ascii" } + }, + + "text/plain" => { + type => "text", + subtype => "plain", + attributes => {} + }, + 'text/plain; charset=us-ascii' => { + type => "text", + subtype => "plain", + attributes => { charset => "us-ascii" } + }, + 'text/plain; charset="us-ascii"' => { + type => "text", + subtype => "plain", + attributes => { charset => "us-ascii" } + }, + "text/plain; charset=us-ascii (Plain text)" => { + type => "text", + subtype => "plain", + attributes => { charset => "us-ascii" } + }, + + 'text/plain; charset=ISO-8859-1' => { + type => "text", + subtype => "plain", + attributes => { charset => "ISO-8859-1" } + }, + 'text/plain; charset="ISO-8859-1"' => { + type => "text", + subtype => "plain", + attributes => { charset => "ISO-8859-1" } + }, + 'text/plain; charset="ISO-8859-1" (comment)' => { + type => "text", + subtype => "plain", + attributes => { charset => "ISO-8859-1" } + }, + + '(c) text/plain (c); (c) charset=ISO-8859-1 (c)' => { + type => "text", + subtype => "plain", + attributes => { charset => "ISO-8859-1" } + }, + '(c \( \\\\) (c) text/plain (c) (c) ; (c) (c) charset=utf-8 (c)' => { + type => "text", + subtype => "plain", + attributes => { charset => "utf-8" } + }, + 'text/plain; (c (nested ()c)another c)() charset=ISO-8859-1' => { + type => "text", + subtype => "plain", + attributes => { charset => "ISO-8859-1" } + }, + 'text/plain (c \(!nested ()c\)\)(nested\(c())); charset=utf-8' => { + type => "text", + subtype => "plain", + attributes => { charset => "utf-8" } + }, + + "application/foo" => { + type => "application", + subtype => "foo", + attributes => {} + }, + "multipart/mixed; boundary=unique-boundary-1" => { + type => "multipart", + subtype => "mixed", + attributes => { boundary => "unique-boundary-1" } + }, + 'message/external-body; access-type=local-file; name="/u/n/m.jpg"' => { + type => "message", + subtype => "external-body", + attributes => { + "access-type" => "local-file", + "name" => "/u/n/m.jpg" + } + }, + 'multipart/mixed; boundary="----------=_1026452699-10321-0" ' => { + 'type' => 'multipart', + 'subtype' => 'mixed', + 'attributes' => { + 'boundary' => '----------=_1026452699-10321-0' + } + }, + 'multipart/report; boundary= "=_0=73e476c3-cd5a-5ba3-b910-2="' => { + 'type' => 'multipart', + 'subtype' => 'report', + 'attributes' => { + 'boundary' => '=_0=73e476c3-cd5a-5ba3-b910-2=' + } + }, + 'multipart/report; boundary=' . " \t" . '"=_0=7-c-5-b-2="' => { + 'type' => 'multipart', + 'subtype' => 'report', + 'attributes' => { + 'boundary' => '=_0=7-c-5-b-2=' + } + }, + + 'message/external-body; access-type=URL;' . + ' URL*0="ftp://";' . + ' URL*1="example.com/"' => { + 'type' => 'message', + 'subtype' => 'external-body', + 'attributes' => { + 'access-type' => 'URL', + 'url' => 'ftp://example.com/' + } + }, + 'message/external-body; access-type=URL; URL="ftp://example.com/"' => { + 'type' => 'message', + 'subtype' => 'external-body', + 'attributes' => { + 'access-type' => 'URL', + 'url' => 'ftp://example.com/', + } + }, + + "application/x-stuff; title*=us-ascii'en-us'This%20is%20f%2Ad" => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => 'This is f*d' + } + }, + "application/x-stuff; title*=us-ascii''This%20is%20f%2Ad" => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => 'This is f*d' + } + }, + "application/x-stuff; title*=''This%20is%20f%2Ad" => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => 'This is f*d' + } + }, + "application/x-stuff; title*='en-us'This%20is%20f%2Ad" => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => 'This is f*d' + } + }, + q(application/x-stuff;) . + q( title*0*=us-ascii'en'This%20is%20even%20more%20;) . + q(title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => "This is even more ***fun*** isn't it!" + } + }, + q(application/x-stuff;) . + q( title*0*='en'This%20is%20even%20more%20;) . + q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => "This is even more ***fun*** isn't it!" + } + }, + q(application/x-stuff;) . + q( title*0*=''This%20is%20even%20more%20;) . + q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => "This is even more ***fun*** isn't it!" + } + }, + q(application/x-stuff;). + q( title*0*=us-ascii''This%20is%20even%20more%20;). + q( title*1*=%2A%2A%2Afun%2A%2A%2A%20; title*2="isn't it!") + => { + 'type' => 'application', + 'subtype' => 'x-stuff', + 'attributes' => { + 'title' => "This is even more ***fun*** isn't it!" + } + }, + + 'text/plain; attribute="v\"v\\\\v\(v\>\<\)\@\,\;\:\/\]\[\?\=v v";' . + ' charset=us-ascii' => { + 'type' => 'text', + 'subtype' => 'plain', + 'attributes' => { + 'attribute' => 'v"v\\v(v><)@,;:/][?=v v', + 'charset' => 'us-ascii', + }, + }, + + qq(text/plain;\r + charset=us-ascii;\r + attribute="\r value1 \r value2\r\n value3\r\n value4\r\n "\r\n ) => { + 'type' => 'text', + 'subtype' => 'plain', + 'attributes' => { + 'attribute' => ' value1 value2 value3 value4 ', + 'charset' => 'us-ascii', + }, + }, +); + +my %non_strict_ct_tests = ( + "text/plain;" => { type => "text", subtype => "plain", attributes => {} }, + "text/plain; " => + { type => "text", subtype => "plain", attributes => {} }, + 'image/jpeg;' . + ' x-mac-type="3F3F3F3F";'. + ' x-mac-creator="3F3F3F3F" name="file name.jpg";' => { + type => "image", + subtype => "jpeg", + attributes => { + 'x-mac-type' => "3F3F3F3F", + 'x-mac-creator' => "3F3F3F3F", + 'name' => "file name.jpg" + } + }, + "text/plain; key=very long value" => { + type => "text", + subtype => "plain", + attributes => { key => "very long value" } + }, + "text/plain; key=very long value key2=value2" => { + type => "text", + subtype => "plain", + attributes => { key => "very long value", key2 => "value2" } + }, + 'multipart/mixed; boundary = "--=_Next_Part_24_Nov_2016_08.09.21"' => { + type => "multipart", + subtype => "mixed", + attributes => { + boundary => "--=_Next_Part_24_Nov_2016_08.09.21" + } + }, +); + +sub test { + my ($string, $expect, $info) = @_; + + # So stupid. -- rjbs, 2013-08-10 + $expect->{discrete} = $expect->{type}; + $expect->{composite} = $expect->{subtype}; + + local $_; + $info =~ s/\r/\\r/g; + $info =~ s/\n/\\n/g; + is_deeply(parse_content_type($string), $expect, $info); +} + +for (sort keys %ct_tests) { + test($_, $ct_tests{$_}, "Can parse C-T <$_>"); +} + +local $PublicInbox::EmlContentFoo::STRICT_PARAMS = 0; +for (sort keys %ct_tests) { + test($_, $ct_tests{$_}, "Can parse non-strict C-T <$_>"); +} +for (sort keys %non_strict_ct_tests) { + test( + $_, + $non_strict_ct_tests{$_}, + "Can parse non-strict C-T <$_>" + ); +} + +done_testing;