diff options
author | Eric Wong <e@80x24.org> | 2019-06-04 02:04:21 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2019-06-04 10:04:57 +0000 |
commit | 1735e5c2cf87b28b096ad91008bdb764d853b26d (patch) | |
tree | 9cdf3ae883b9df1309bee4e4cd22cd1d5bdf69d5 /lib/PublicInbox/Linkify.pm | |
parent | b77c87a6fce05c4f2048aa0a73fde7b25a2b0002 (diff) | |
download | public-inbox-1735e5c2cf87b28b096ad91008bdb764d853b26d.tar.gz |
The "\w" character class in Perl matches any word characters in the Unicode database, not just ASCII characters. So we must be prepared for that and generate links to IDNs.
Diffstat (limited to 'lib/PublicInbox/Linkify.pm')
-rw-r--r-- | lib/PublicInbox/Linkify.pm | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index d4778e7d..84960a98 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -13,6 +13,7 @@ package PublicInbox::Linkify; use strict; use warnings; use Digest::SHA qw/sha1_hex/; +use PublicInbox::Hval qw(ascii_html); my $SALT = rand; my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher):// @@ -61,12 +62,12 @@ sub linkify_1 { $end = ')'; } + $url = ascii_html($url); # for IDN + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); - # only escape ampersands, others do not match LINK_RE - $url =~ s/&/&/g; $_[0]->{$key} = $url; $beg . 'PI-LINK-'. $key . $end; ^ge; |