about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-06-04 02:04:21 +0000
committerEric Wong <e@80x24.org>2019-06-04 10:04:57 +0000
commit1735e5c2cf87b28b096ad91008bdb764d853b26d (patch)
tree9cdf3ae883b9df1309bee4e4cd22cd1d5bdf69d5 /lib
parentb77c87a6fce05c4f2048aa0a73fde7b25a2b0002 (diff)
downloadpublic-inbox-1735e5c2cf87b28b096ad91008bdb764d853b26d.tar.gz
The "\w" character class in Perl matches any word characters
in the Unicode database, not just ASCII characters.  So we
must be prepared for that and generate links to IDNs.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Linkify.pm5
1 files changed, 3 insertions, 2 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d4778e7d..84960a98 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -13,6 +13,7 @@ package PublicInbox::Linkify;
 use strict;
 use warnings;
 use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
 
 my $SALT = rand;
 my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
@@ -61,12 +62,12 @@ sub linkify_1 {
                         $end = ')';
                 }
 
+                $url = ascii_html($url); # for IDN
+
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($url . $SALT);
 
-                # only escape ampersands, others do not match LINK_RE
-                $url =~ s/&/&#38;/g;
                 $_[0]->{$key} = $url;
                 $beg . 'PI-LINK-'. $key . $end;
         ^ge;