From 846161e3d1207d59f62b3a6718221d6f5ba2b94f Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 28 May 2020 18:37:08 +0000 Subject: treat $INBOX_DIR/description and gitweb.owner as UTF-8 gitweb does the same with $GIT_DIR/description and gitweb.owner. Allowing UTF-8 description should not cause problems when used in responses for to the NNTP "LIST NEWSGROUPS" request, either, since RFC 3977 section 7.6.6 recommends the description be UTF-8 (but does not require it). Link: https://public-inbox.org/meta/20200528151216.l7vmnmrs4ojw372g@sourcephile.fr/ --- lib/PublicInbox/Inbox.pm | 1 + lib/PublicInbox/WwwListing.pm | 2 ++ t/inbox.t | 7 ++++--- t/www_listing.t | 5 +++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 002b980f..c295b267 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -223,6 +223,7 @@ sub description { my $desc = try_cat("$self->{inboxdir}/description"); local $/ = "\n"; chomp $desc; + utf8::decode($desc); $desc =~ s/\s+/ /smg; $desc eq '' ? undef : $desc; }) // '($INBOX_DIR/description missing)'; diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index 38a37dda..a416d24f 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -159,6 +159,8 @@ sub manifest_add ($$;$$) { chomp(my $desc = try_cat("$git_dir/description")); $owner = undef if $owner eq ''; $desc = 'Unnamed repository' if $desc eq ''; + utf8::decode($desc); + utf8::decode($owner); # templates/hooks--update.sample and git-multimail in git.git # only match "Unnamed repository", not the full contents of diff --git a/t/inbox.t b/t/inbox.t index b59d5dba..08f1724f 100644 --- a/t/inbox.t +++ b/t/inbox.t @@ -22,13 +22,14 @@ is($x->description, '($INBOX_DIR/description missing)', 'default description'); print $fh "https://example.com/inbox\n" or die; close $fh or die; open $fh, '>', "$x->{inboxdir}/description" or die; - print $fh "blah\n" or die; + print $fh "\xc4\x80blah\n" or die; close $fh or die; } is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls update'); -is($x->description, 'blah', 'description updated'); +ok(utf8::valid($x->description), 'description is utf8::valid'); +is($x->description, "\x{100}blah", 'description updated'); is(unlink(glob("$x->{inboxdir}/*")), 2, 'unlinked cloneurl & description'); is_deeply($x->cloneurl, ['https://example.com/inbox'], 'cloneurls memoized'); -is($x->description, 'blah', 'description memoized'); +is($x->description, "\x{100}blah", 'description memoized'); done_testing(); diff --git a/t/www_listing.t b/t/www_listing.t index 31d76356..0aededd4 100644 --- a/t/www_listing.t +++ b/t/www_listing.t @@ -46,7 +46,7 @@ sub tiny_test { unlike($tmp, qr/"modified":\s*"/, 'modified is an integer'); my $manifest = $json->decode($tmp); ok(my $clone = $manifest->{'/alt'}, '/alt in manifest'); - is($clone->{owner}, 'lorelei', 'owner set'); + is($clone->{owner}, "lorelei \x{100}", 'owner set'); is($clone->{reference}, '/bare', 'reference detected'); is($clone->{description}, "we're all clones", 'description read'); ok(my $bare = $manifest->{'/bare'}, '/bare in manifest'); @@ -88,7 +88,8 @@ SKIP: { open $fh, '>', "$alt/description" or die; print $fh "we're all clones\n" or die; close $fh or die; - is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner lorelei)), 0, + is(xsys('git', "--git-dir=$alt", qw(config gitweb.owner), + "lorelei \xc4\x80"), 0, 'set gitweb user'); ok(unlink("$bare->{git_dir}/description"), 'removed bare/description'); open $fh, '>', $cfgfile or die; -- cgit v1.2.3-24-ge0c7