diff options
author | Eric Wong <e@80x24.org> | 2016-12-06 23:40:33 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-12-06 23:40:57 +0000 |
commit | f99f9048cdac42509fbbc1f97e2af32fa3bffca6 (patch) | |
tree | f7960502ba4334dfb01163d7b1bb604ae8ba9ec6 /lib/PublicInbox/Linkify.pm | |
parent | 52e44dc8f9e01678f309818c8ca2bc65b8285738 (diff) | |
download | public-inbox-f99f9048cdac42509fbbc1f97e2af32fa3bffca6.tar.gz |
Although unescaped parentheses in URLs are technically allowed, they are uncommon. However, Markdown-like syntaxes are unfortunately common for URLs, so we might as well support them. This fixes parentheses detection at sentence endings, as seen in practice on emails.
Diffstat (limited to 'lib/PublicInbox/Linkify.pm')
-rw-r--r-- | lib/PublicInbox/Linkify.pm | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index ea7fd71f..acd2a47e 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -15,7 +15,7 @@ use warnings; use Digest::SHA qw/sha1_hex/; my $SALT = rand; -my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher):// +my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher):// [\@:\w\.-]+/ (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*) (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)? @@ -27,14 +27,20 @@ sub new { bless {}, shift } sub linkify_1 { my ($self, $s) = @_; $s =~ s!$LINK_RE! - my $url = $1; + my $beg = $1 || ''; + my $url = $2; my $end = ''; # it's fairly common to end URLs in messages with # '.', ',' or ';' to denote the end of a statement; # assume the intent was to end the statement/sentence # in English - if ($url =~ s/([\.,;])\z//) { + # Markdown compatibility: + if ($beg eq '(') { + if ($url =~ s/(\)[\.,;]?)\z//) { + $end = $1; + } + } elsif ($url =~ s/([\.,;])\z//) { $end = $1; } @@ -45,7 +51,7 @@ sub linkify_1 { # only escape ampersands, others do not match LINK_RE $url =~ s/&/&/g; $self->{$key} = $url; - 'PI-LINK-'. $key . $end; + $beg . 'PI-LINK-'. $key . $end; !ge; $s; } |