about summary refs log tree commit homepage
path: root/lib/PublicInbox/Linkify.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2019-06-04 02:04:21 +0000
committerEric Wong <e@80x24.org>2019-06-04 10:04:57 +0000
commit1735e5c2cf87b28b096ad91008bdb764d853b26d (patch)
tree9cdf3ae883b9df1309bee4e4cd22cd1d5bdf69d5 /lib/PublicInbox/Linkify.pm
parentb77c87a6fce05c4f2048aa0a73fde7b25a2b0002 (diff)
downloadpublic-inbox-1735e5c2cf87b28b096ad91008bdb764d853b26d.tar.gz
The "\w" character class in Perl matches any word characters
in the Unicode database, not just ASCII characters.  So we
must be prepared for that and generate links to IDNs.
Diffstat (limited to 'lib/PublicInbox/Linkify.pm')
-rw-r--r--lib/PublicInbox/Linkify.pm5
1 files changed, 3 insertions, 2 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d4778e7d..84960a98 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -13,6 +13,7 @@ package PublicInbox::Linkify;
 use strict;
 use warnings;
 use Digest::SHA qw/sha1_hex/;
+use PublicInbox::Hval qw(ascii_html);
 
 my $SALT = rand;
 my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
@@ -61,12 +62,12 @@ sub linkify_1 {
                         $end = ')';
                 }
 
+                $url = ascii_html($url); # for IDN
+
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($url . $SALT);
 
-                # only escape ampersands, others do not match LINK_RE
-                $url =~ s/&/&#38;/g;
                 $_[0]->{$key} = $url;
                 $beg . 'PI-LINK-'. $key . $end;
         ^ge;