From dfe55f5ee5bd6e3a12d933a6570eb94f294d1c54 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 18 Aug 2016 02:02:50 +0000 Subject: linkify: be stricter about matching RFC 3986 We're not to-the-letter about percent-encoding, but we should allow all the characters. This is mainly so we can effectively use the link to some Wikipedia pages with parentheses in them: https://en.wikipedia.org/wiki/Atom_(standard) https://en.wikipedia.org/wiki/Git_(software) --- lib/PublicInbox/Linkify.pm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index d4df689e..ea7fd71f 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -17,7 +17,10 @@ use Digest::SHA qw/sha1_hex/; my $SALT = rand; my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher):// [\@:\w\.-]+/ - ?[!,:~\$\@\w\+\&\?\.\%\;/#=-]*)}x; + (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) + (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? + (?:\#[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%\?]+)? + )}xi; sub new { bless {}, shift } -- cgit v1.2.3-24-ge0c7