about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-01-19 09:40:51 +0000
committerEric Wong <e@yhbt.net>2020-01-23 23:04:04 +0000
commitc242a28351d3a9a39224d35031d71afc02993c7a (patch)
tree55fd78496cbb7547b60be2f2b0d16e72f240ee92 /lib/PublicInbox
parentb33f080dc286beb160ce25858cd98010cb836a7e (diff)
downloadpublic-inbox-c242a28351d3a9a39224d35031d71afc02993c7a.tar.gz
We need to escape wide characters when making attribute names from
filename-looking things in diffstats.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Hval.pm3
1 files changed, 3 insertions, 0 deletions
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 7e007027..39256ee0 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -139,10 +139,12 @@ sub to_attr ($) {
         return if index($str, '//') >= 0;
 
         my $first = '';
+        utf8::encode($str); # to octets
         if ($str =~ s/\A([^A-Ya-z])//ms) { # start with a letter
                   $first = sprintf('Z%02x', ord($1));
         }
         $str =~ s/([^A-Za-z0-9_\.\-])/$ESCAPES{$1}/egms;
+        utf8::decode($str); # allow wide chars
         $first . $str;
 }
 
@@ -155,6 +157,7 @@ sub from_attr ($) {
         }
         $str =~ s!::([a-f0-9]{2})!chr(hex($1))!egms;
         $str =~ tr!:!/!;
+        utf8::decode($str);
         $first . $str;
 }