From 472d39de46603b180ab6e739e0b31ab7ef559870 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 1 Mar 2016 03:44:04 +0000 Subject: linkify: do not capture trailing '.' or ';' in URLs It seems common for users to end statements with URLs, while it is rare for a URL itself to end with a '.' or ';'. So make a guess and assume the URL was intended to not include the trailing '.' or ';' --- lib/PublicInbox/Linkify.pm | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/Linkify.pm') diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 8f634f48..4eddedd0 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -25,6 +25,14 @@ sub linkify_1 { my ($self, $s) = @_; $s =~ s!$LINK_RE! my $url = $1; + my $end = ''; + + # it's fairly common to end URLs in messages with + # '.' or ';' to denote the end of a statement. + if ($url =~ s/(\.)\z// || $url =~ s/(;)\z//) { + $end = $1; + } + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); @@ -32,7 +40,7 @@ sub linkify_1 { # only escape ampersands, others do not match LINK_RE $url =~ s/&/&/g; $self->{$key} = $url; - 'PI-LINK-'. $key; + 'PI-LINK-'. $key . $end; !ge; $s; } -- cgit v1.2.3-24-ge0c7