From c1e7a048be9d32cdca943bb3b7f935ed28768d4d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 12 Sep 2022 22:54:04 +0000 Subject: www: viewdiff: fix UTF-8 names inside mbox attachments This avoids `Wide character in print' warnings and ensures the UTF-8 characters in `Signed-off-by' trailers are properly rendered in HTML even when attempting to decode and display application/octet-stream mbox attachments as HTML. Linkification and reconstruction for coderepos is probably still broken, but that is a much bigger task to fix, I think. Fixes: ab9c03ff4aa369b3 ("www: use PerlIO::scalar (zfh) for buffering") --- lib/PublicInbox/ViewDiff.pm | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm index fba3d76c..9a7adb57 100644 --- a/lib/PublicInbox/ViewDiff.pm +++ b/lib/PublicInbox/ViewDiff.pm @@ -7,8 +7,7 @@ # (or reconstruct) blobs. package PublicInbox::ViewDiff; -use strict; -use v5.10.1; +use v5.12; use parent qw(Exporter); our @EXPORT_OK = qw(flush_diff uri_escape_path); use URI::Escape qw(uri_escape_utf8); @@ -197,7 +196,8 @@ sub flush_diff ($$) { $top[0] =~ $IS_OID) { $dctx = diff_header(\$x, $ctx, \@top); } elsif ($dctx) { - open(my $afh, '>>', \(my $after='')) or die "open: $!"; + open(my $afh, '>>:utf8', \(my $after='')) or + die "open: $!"; # Quiet "Complex regular subexpression recursion limit" # warning. Perl will truncate matches upon hitting @@ -213,7 +213,7 @@ sub flush_diff ($$) { (?:(?:^-[^\n]*\n)+)| (?:^@@ [^\n]+\n))/xsm, $x)) { if (!defined($dctx)) { - print $afh $s; + print $afh $x; } elsif ($s =~ s/\A@@ (\S+) (\S+) @@//) { print $zfh qq(), diff_hunk($dctx, $1, $2), @@ -234,7 +234,10 @@ sub flush_diff ($$) { print $zfh $lnk->to_html($s); } } - diff_before_or_after($ctx, \$after) if !$dctx; + if (!$dctx) { + utf8::decode($after); + diff_before_or_after($ctx, \$after); + } } else { diff_before_or_after($ctx, \$x); } -- cgit v1.2.3-24-ge0c7