diff options
author | Eric Wong <e@80x24.org> | 2016-03-01 03:44:04 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-03-01 03:44:26 +0000 |
commit | 472d39de46603b180ab6e739e0b31ab7ef559870 (patch) | |
tree | 69221f465d404166ab2dcfcc0f44a7c99deb8d31 /lib/PublicInbox/Linkify.pm | |
parent | 704d1886ec4c34ffee0a37293970329418582211 (diff) | |
download | public-inbox-472d39de46603b180ab6e739e0b31ab7ef559870.tar.gz |
It seems common for users to end statements with URLs, while it is rare for a URL itself to end with a '.' or ';'. So make a guess and assume the URL was intended to not include the trailing '.' or ';'
Diffstat (limited to 'lib/PublicInbox/Linkify.pm')
-rw-r--r-- | lib/PublicInbox/Linkify.pm | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 8f634f48..4eddedd0 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -25,6 +25,14 @@ sub linkify_1 { my ($self, $s) = @_; $s =~ s!$LINK_RE! my $url = $1; + my $end = ''; + + # it's fairly common to end URLs in messages with + # '.' or ';' to denote the end of a statement. + if ($url =~ s/(\.)\z// || $url =~ s/(;)\z//) { + $end = $1; + } + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); @@ -32,7 +40,7 @@ sub linkify_1 { # only escape ampersands, others do not match LINK_RE $url =~ s/&/&/g; $self->{$key} = $url; - 'PI-LINK-'. $key; + 'PI-LINK-'. $key . $end; !ge; $s; } |