From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.2 required=3.0 tests=ALL_TRUSTED,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF, T_SCC_BODY_TEXT_LINE shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 235C01F54E for ; Mon, 12 Sep 2022 22:54:04 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1663023244; bh=nPi/ZKs5QcB8uK9jdmUz9Y2wy56+tLQ1hqt2oa6pb9A=; h=From:To:Subject:Date:From; b=E4jiLPslOzZHR23SUIsr6QOxiU0gXr34wsV88mmtH4cHqPnIXqQqdbS1KxmdhHdaJ YH7n26by8FbkxWH6CLcZYi3nR0EdUf9h5R4KwAR0HCZjoKMJcCfhT55PEQkdod2yTS acBxttC5G39RLWO1Tuf2nQgUALXwuimcsdjxFrBc= From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] www: viewdiff: fix UTF-8 names inside mbox attachments Date: Mon, 12 Sep 2022 22:54:04 +0000 Message-Id: <20220912225404.3115490-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: This avoids `Wide character in print' warnings and ensures the UTF-8 characters in `Signed-off-by' trailers are properly rendered in HTML even when attempting to decode and display application/octet-stream mbox attachments as HTML. Linkification and reconstruction for coderepos is probably still broken, but that is a much bigger task to fix, I think. Fixes: ab9c03ff4aa369b3 ("www: use PerlIO::scalar (zfh) for buffering") --- MANIFEST | 1 + lib/PublicInbox/ViewDiff.pm | 13 +++++---- t/data/attached-mbox-with-utf8.eml | 45 ++++++++++++++++++++++++++++++ t/plack.t | 8 ++++-- 4 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 t/data/attached-mbox-with-utf8.eml diff --git a/MANIFEST b/MANIFEST index 8be912d0..35382d2d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -400,6 +400,7 @@ t/content_hash.t t/convert-compact.t t/data-gen/.gitignore t/data/0001.patch +t/data/attached-mbox-with-utf8.eml t/data/binary.patch t/data/message_embed.eml t/dir_idle.t diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index fba3d76c..9a7adb57 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -7,8 +7,7 @@ # (or reconstruct) blobs. package PublicInbox::ViewDiff; -use strict; -use v5.10.1; +use v5.12; use parent qw(Exporter); our @EXPORT_OK = qw(flush_diff uri_escape_path); use URI::Escape qw(uri_escape_utf8); @@ -197,7 +196,8 @@ sub flush_diff ($$) { $top[0] =~ $IS_OID) { $dctx = diff_header(\$x, $ctx, \@top); } elsif ($dctx) { - open(my $afh, '>>', \(my $after='')) or die "open: $!"; + open(my $afh, '>>:utf8', \(my $after='')) or + die "open: $!"; # Quiet "Complex regular subexpression recursion limit" # warning. Perl will truncate matches upon hitting @@ -213,7 +213,7 @@ sub flush_diff ($$) { (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { if (!defined($dctx)) { - print $afh $s; + print $afh $x; } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) { print $zfh qq(), diff_hunk($dctx, $1, $2), @@ -234,7 +234,10 @@ sub flush_diff ($$) { print $zfh $lnk->to_html($s); } } - diff_before_or_after($ctx, \$after) if !$dctx; + if (!$dctx) { + utf8::decode($after); + diff_before_or_after($ctx, \$after); + } } else { diff_before_or_after($ctx, \$x); } diff --git a/t/data/attached-mbox-with-utf8.eml b/t/data/attached-mbox-with-utf8.eml new file mode 100644 index 00000000..53dad830 --- /dev/null +++ b/t/data/attached-mbox-with-utf8.eml @@ -0,0 +1,45 @@ +Date: Mon, 24 Sep 2018 09:46:40 -0700 (PDT) +Message-Id: +To: test@example.com +Subject: [PATCHES] attached mbox with UTF-8 patch +From: attacher@example.com +Mime-Version: 1.0 +Content-Type: Multipart/Mixed; + boundary="--Next_Part(Mon_Sep_24_09_46_40_2018_110)--" +Content-Transfer-Encoding: 7bit + +----Next_Part(Mon_Sep_24_09_46_40_2018_110)-- +Content-Type: Text/Plain; charset=us-ascii +Content-Transfer-Encoding: 7bit + +hello world + +----Next_Part(Mon_Sep_24_09_46_40_2018_110)-- +Content-Type: Application/Octet-Stream +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="foo.mbox" + +RnJvbSAzNGRkMWQyNWQ3NmU0NjRjNTM0ZGI0MDllYTdlZDQyNWFiMDVjODI2IE1vbiBTZXAgMTcg +MDA6MDA6MDAgMjAwMQpGcm9tOiA9P1VURi04P3E/Qmo9QzM9Qjhybj89IDxiam9ybkBleGFtcGxl +LmNvbT4KRGF0ZTogVGh1LCAxMiBTZXAgMjAxOSAxMDo0MjowMCArMDIwMApNSU1FLVZlcnNpb246 +IDEuMApDb250ZW50LVR5cGU6IHRleHQvcGxhaW47IGNoYXJzZXQ9VVRGLTgKQ29udGVudC1UcmFu +c2Zlci1FbmNvZGluZzogOGJpdAoKU2lnbmVkLW9mZi1ieTogQmrDuHJuIDxiam9ybkBleGFtcGxl +LmNvbT4KU2lnbmVkLW9mZi1ieTogaiDFu2VuIDx6QGV4YW1wbGUuY29tPgotLS0KIGZvby5jIHwg +MSArLQogMSBmaWxlIGNoYW5nZWQsIDEgaW5zZXJ0aW9ucygrKSwgMSBkZWxldGlvbnMoLSkKCmRp +ZmYgLS1naXQgYS9mb28uYyBiL2Zvby5jCmluZGV4IDVjNDJjZjgxYTA4Yi4uODVmYmE2NGMzZmNm +IDEwMDY0NAotLS0gYS9mb28uYworKysgYi9mb28uYwpAQCAtMjIxLDkgKzIyMSw5IEBAIGludCBo +ZWxsbyh2b2lkKQogCQlnb3RvIHBoYWlsOwogCX0KIHNraXA6Ci0JaWYgKAlmb28gJiYKKwl1bmxl +c3MgKGZvbykKIGJsYWgKIGJsYWgKIGJsYWgKLS0gCkJqw7hybgoKRnJvbSAzNGRkMWQyNWQ3NmU0 +NjRjNTM0ZGI0MDllYTdlZDQyNWFiMDVjODI2IE1vbiBTZXAgMTcgMDA6MDA6MDAgMjAwMQpGcm9t +OiA9P1VURi04P3E/Qmo9QzM9Qjhybj89IDxiam9ybkBleGFtcGxlLmNvbT4KRGF0ZTogVGh1LCAx +MiBTZXAgMjAxOSAxMDo0MjowMCArMDIwMApNSU1FLVZlcnNpb246IDEuMApDb250ZW50LVR5cGU6 +IHRleHQvcGxhaW47IGNoYXJzZXQ9VVRGLTgKQ29udGVudC1UcmFuc2Zlci1FbmNvZGluZzogOGJp +dAoKU2lnbmVkLW9mZi1ieTogQmrDuHJuIDxiam9ybkBleGFtcGxlLmNvbT4KU2lnbmVkLW9mZi1i +eTogaiDFu2VuIDx6QGV4YW1wbGUuY29tPgotLS0KIGZvby5jIHwgMSArLQogMSBmaWxlIGNoYW5n +ZWQsIDEgaW5zZXJ0aW9ucygrKSwgMSBkZWxldGlvbnMoLSkKCmRpZmYgLS1naXQgYS9mb28uYyBi +L2Zvby5jCmluZGV4IDVjNDJjZjgxYTA4Yi4uODVmYmE2NGMzZmNmIDEwMDY0NAotLS0gYS9mb28u +YworKysgYi9mb28uYwpAQCAtMjIxLDkgKzIyMSw5IEBAIGludCBoZWxsbyh2b2lkKQogCQlnb3Rv +IHBoYWlsOwogCX0KIHNraXA6Ci0JaWYgKAlmb28gJiYKKwl1bmxlc3MgKGZvbykKIGJsYWgKIGJs +YWgKIGJsYWgKLS0gCkJqw7hybgo= + +----Next_Part(Mon_Sep_24_09_46_40_2018_110)---- diff --git a/t/plack.t b/t/plack.t index 1cee286d..7f80f488 100644 --- a/t/plack.t +++ b/t/plack.t @@ -13,7 +13,7 @@ my ($tmpdir, $for_destroy) = tmpdir(); my $pfx = 'http://example.com/test'; my $eml = eml_load('t/iso-2202-jp.eml'); # ensure successful message deliveries -my $ibx = create_inbox('test-1', sub { +my $ibx = create_inbox('u8-2', sub { my ($im, $ibx) = @_; my $addr = $ibx->{-primary_address}; $im->add($eml) or xbail '->add'; @@ -39,6 +39,8 @@ EOF # multipart with attached patch + filename $im->add(eml_load('t/plack-attached-patch.eml')) or BAIL_OUT '->add'; + $im->add(eml_load('t/data/attached-mbox-with-utf8.eml')) or xbail 'add'; + # multipart collapsed to single quoted-printable text/plain $im->add(eml_load('t/plack-qp.eml')) or BAIL_OUT '->add'; my $crlf = <(GET($pfx . '/qp@example.com/')); like($res->content, qr/\bhi = bye\b/, "HTML output decoded QP"); + $res = $cb->(GET($pfx . '/attached-mbox-with-utf8@example/')); + like($res->content, qr/: Bjørn /, 'UTF-8 in mbox #1'); + like($res->content, qr/: j Żen/, 'UTF-8 in mbox #2'); $res = $cb->(GET($pfx . '/blah@example.com/raw')); is(200, $res->code, 'success response received for /*/raw'); @@ -246,7 +251,6 @@ my $c1 = sub { 'redirect from x40 MIDs works'); } - # dumb HTTP clone/fetch support $path = '/test/info/refs'; my $req = HTTP::Request->new('GET' => $path);