From e022d3377fd2c50fd9931bf96394728958a90bf3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Apr 2014 01:01:10 +0000 Subject: huge refactor of encoding handling Hopefully this simplifies and corrects our usage of Perl encoding APIs. --- t/filter.t | 66 +++++++++++++++++++++++++------------------------------------ t/mda.t | 21 ++++++++++++++++++++ t/utf8.mbox | 12 +++++++++++ 3 files changed, 60 insertions(+), 39 deletions(-) create mode 100644 t/utf8.mbox (limited to 't') diff --git a/t/filter.t b/t/filter.t index ac9f1f6f..c3cd39f4 100644 --- a/t/filter.t +++ b/t/filter.t @@ -4,7 +4,6 @@ use strict; use warnings; use Test::More; use Email::MIME; -use Email::Filter; use PublicInbox::Filter; sub count_body_parts { @@ -18,7 +17,7 @@ sub count_body_parts { # plain-text email is passed through unchanged { - my $s = Email::Simple->create( + my $s = Email::MIME->create( header => [ From => 'a@example.com', To => 'b@example.com', @@ -27,14 +26,12 @@ sub count_body_parts { ], body => "hello world\n", ); - my $f = Email::Filter->new(data => $s->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - is($s->as_string, $f->simple->as_string, "plain email unchanged"); + is(1, PublicInbox::Filter->run($s), "run was a success"); } # convert single-part HTML to plain-text { - my $s = Email::Simple->create( + my $s = Email::MIME->create( header => [ From => 'a@example.com', To => 'b@example.com', @@ -43,13 +40,12 @@ sub count_body_parts { ], body => "bad body\n", ); - my $f = Email::Filter->new(data => $s->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - unlike($f->simple->as_string, qr//, "HTML removed"); - is("text/plain", $f->simple->header("Content-Type"), + is(1, PublicInbox::Filter->run($s), "run was a success"); + unlike($s->as_string, qr//, "HTML removed"); + is("text/plain", $s->header("Content-Type"), "content-type changed"); - like($f->simple->body, qr/\A\s*bad body\s*\z/, "body"); - like($f->simple->header("X-Content-Filtered-By"), + like($s->body, qr/\A\s*bad body\s*\z/, "body"); + like($s->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/, "XCFB header added"); } @@ -79,9 +75,8 @@ sub count_body_parts { ], parts => $parts, ); - my $f = Email::Filter->new(data => $email->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - my $parsed = Email::MIME->new($f->simple->as_string); + is(1, PublicInbox::Filter->run($email), "run was a success"); + my $parsed = Email::MIME->new($email->as_string); is("text/plain", $parsed->header("Content-Type")); is(scalar $parsed->parts, 1, "HTML part removed"); my %bodies; @@ -110,9 +105,8 @@ sub count_body_parts { header_str => [ From => 'a@example.com', Subject => 'blah' ], parts => $parts, ); - my $f = Email::Filter->new(data => $email->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - my $parsed = Email::MIME->new($f->simple->as_string); + is(1, PublicInbox::Filter->run($email), "run was a success"); + my $parsed = Email::MIME->new($email->as_string); is(scalar $parsed->parts, 2, "still 2 parts"); my %bodies; $parsed->walk_parts(sub { @@ -149,9 +143,8 @@ sub count_body_parts { header_str => [ From => 'a@example.com', Subject => 'blah' ], parts => $parts, ); - my $f = Email::Filter->new(data => $email->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - my $parsed = Email::MIME->new($f->simple->as_string); + is(1, PublicInbox::Filter->run($email), "run was a success"); + my $parsed = Email::MIME->new($email->as_string); is(scalar $parsed->parts, 2, "still 2 parts"); my %bodies; $parsed->walk_parts(sub { @@ -186,9 +179,8 @@ sub count_body_parts { header_str => [ From => 'a@example.com', Subject => 'blah' ], parts => $parts, ); - my $f = Email::Filter->new(data => $email->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - my $parsed = Email::MIME->new($f->simple->as_string); + is(1, PublicInbox::Filter->run($email), "run was a success"); + my $parsed = Email::MIME->new($email->as_string); is(scalar $parsed->parts, 1, "image part removed"); my %bodies; $parsed->walk_parts(sub { @@ -226,10 +218,9 @@ sub count_body_parts { header_str => [ From => 'a@example.com', Subject => 'blah' ], parts => $parts, ); - my $f = Email::Filter->new(data => $email->as_string); - is(0, PublicInbox::Filter->run($f->simple), + is(0, PublicInbox::Filter->run($email), "run signaled to stop delivery"); - my $parsed = Email::MIME->new($f->simple->as_string); + my $parsed = Email::MIME->new($email->as_string); is(scalar $parsed->parts, 1, "bad parts removed"); my %bodies; $parsed->walk_parts(sub { @@ -245,7 +236,7 @@ sub count_body_parts { } { - my $s = Email::Simple->create( + my $s = Email::MIME->create( header => [ From => 'a@example.com', To => 'b@example.com', @@ -254,13 +245,12 @@ sub count_body_parts { ], body => "hello world\n", ); - my $f = Email::Filter->new(data => $s->as_string); - is(0, PublicInbox::Filter->run($f->simple), "run was a failure"); - like($f->simple->as_string, qr/scrubbed/, "scrubbed message"); + is(0, PublicInbox::Filter->run($s), "run was a failure"); + like($s->as_string, qr/scrubbed/, "scrubbed message"); } { - my $s = Email::Simple->create( + my $s = Email::MIME->create( header => [ From => 'a@example.com', To => 'b@example.com', @@ -273,9 +263,8 @@ sub count_body_parts { is('c@example.com', $s->header("Mail-Followup-To"), "mft set correctly"); - my $f = Email::Filter->new(data => $s->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run succeeded for mft"); - is(undef, $f->simple->header("Mail-Followup-To"), "mft stripped"); + is(1, PublicInbox::Filter->run($s), "run succeeded for mft"); + is(undef, $s->header("Mail-Followup-To"), "mft stripped"); } # multi-part with application/octet-stream @@ -308,11 +297,10 @@ EOF header_str => [ From => 'a@example.com', Subject => 'blah' ], parts => $parts, ); - my $f = Email::Filter->new(data => $email->as_string); - is(1, PublicInbox::Filter->run($f->simple), "run was a success"); - my $parsed = Email::MIME->new($f->simple->as_string); + is(1, PublicInbox::Filter->run($email), "run was a success"); + my $parsed = Email::MIME->new($email->as_string); is(scalar $parsed->parts, 1, "only one remaining part"); - like($f->simple->header("X-Content-Filtered-By"), + like($parsed->header("X-Content-Filtered-By"), qr/PublicInbox::Filter/, "XCFB header added"); } diff --git a/t/mda.t b/t/mda.t index 3357718c..fad96e5b 100644 --- a/t/mda.t +++ b/t/mda.t @@ -4,6 +4,7 @@ use strict; use warnings; use Test::More; use Email::MIME; +use Email::Filter; use File::Temp qw/tempdir/; use Cwd; use IPC::Run qw(run); @@ -43,6 +44,26 @@ my $failbox = "$home/fail.mbox"; } } +local $ENV{GIT_COMMITTER_NAME} = eval { + use PublicInbox::MDA; + use Encode qw/encode/; + my $mbox = 't/utf8.mbox'; + open(my $fh, '<', $mbox) or die "failed to open mbox: $mbox\n"; + my $str = eval { local $/; <$fh> }; + close $fh; + my $msg = Email::Filter->new(data => $str); + $msg = Email::MIME->new($msg->simple->as_string); + my ($author, $email, $date) = PublicInbox::MDA->author_info($msg); + is('Eléanor', + encode('us-ascii', my $tmp = $author, Encode::HTMLCREF), + 'HTML conversion is correct'); + is($email, 'e@example.com', 'email parsed correctly'); + is($date, 'Thu, 01 Jan 1970 00:00:00 +0000', + 'message date parsed correctly'); + $author; +}; +die $@ if $@; + { my $good_rev; local $ENV{PI_EMERGENCY} = $failbox; diff --git a/t/utf8.mbox b/t/utf8.mbox new file mode 100644 index 00000000..cebaf9b0 --- /dev/null +++ b/t/utf8.mbox @@ -0,0 +1,12 @@ +From e@yhbt.net Thu Jan 01 00:00:00 1970 +Date: Thu, 01 Jan 1970 00:00:00 +0000 +To: =?utf-8?Q?El=C3=A9anor?= +From: =?utf-8?Q?El=C3=A9anor?= +Subject: Testing for =?utf-8?Q?El=C3=A9anor?= +Message-ID: +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Disposition: inline +Content-Transfer-Encoding: 8bit + +This is a test message for ElĂ©anor -- cgit v1.2.3-24-ge0c7