diff options
author | Eric Wong <e@80x24.org> | 2016-03-01 03:44:04 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-03-01 03:44:26 +0000 |
commit | 472d39de46603b180ab6e739e0b31ab7ef559870 (patch) | |
tree | 69221f465d404166ab2dcfcc0f44a7c99deb8d31 | |
parent | 704d1886ec4c34ffee0a37293970329418582211 (diff) | |
download | public-inbox-472d39de46603b180ab6e739e0b31ab7ef559870.tar.gz |
It seems common for users to end statements with URLs, while it is rare for a URL itself to end with a '.' or ';'. So make a guess and assume the URL was intended to not include the trailing '.' or ';'
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | lib/PublicInbox/Linkify.pm | 10 | ||||
-rw-r--r-- | t/linkify.t | 26 |
3 files changed, 36 insertions, 1 deletions
@@ -80,6 +80,7 @@ t/httpd-corner.psgi t/httpd-corner.t t/httpd.t t/init.t +t/linkify.t t/main-bin/spamc t/mda.t t/msgmap.t diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 8f634f48..4eddedd0 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -25,6 +25,14 @@ sub linkify_1 { my ($self, $s) = @_; $s =~ s!$LINK_RE! my $url = $1; + my $end = ''; + + # it's fairly common to end URLs in messages with + # '.' or ';' to denote the end of a statement. + if ($url =~ s/(\.)\z// || $url =~ s/(;)\z//) { + $end = $1; + } + # salt this, as this could be exploited to show # links in the HTML which don't show up in the raw mail. my $key = sha1_hex($url . $SALT); @@ -32,7 +40,7 @@ sub linkify_1 { # only escape ampersands, others do not match LINK_RE $url =~ s/&/&/g; $self->{$key} = $url; - 'PI-LINK-'. $key; + 'PI-LINK-'. $key . $end; !ge; $s; } diff --git a/t/linkify.t b/t/linkify.t new file mode 100644 index 00000000..586691ae --- /dev/null +++ b/t/linkify.t @@ -0,0 +1,26 @@ +# Copyright (C) 2016 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use warnings; +use Test::More; +use PublicInbox::Linkify; + +{ + my $l = PublicInbox::Linkify->new; + my $u = 'http://example.com/url-with-trailing-period'; + my $s = $u . '.'; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(<a\nhref="$u">$u</a>.), 'trailing period not in URL'); +} + +{ + my $l = PublicInbox::Linkify->new; + my $u = 'http://example.com/url-with-trailing-semicolon'; + my $s = $u . ';'; + $s = $l->linkify_1($s); + $s = $l->linkify_2($s); + is($s, qq(<a\nhref="$u">$u</a>;), 'trailing semicolon not in URL'); +} + +done_testing(); |