about summary refs log tree commit homepage
path: root/lib/PublicInbox/Linkify.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-12-06 23:40:33 +0000
committerEric Wong <e@80x24.org>2016-12-06 23:40:57 +0000
commitf99f9048cdac42509fbbc1f97e2af32fa3bffca6 (patch)
treef7960502ba4334dfb01163d7b1bb604ae8ba9ec6 /lib/PublicInbox/Linkify.pm
parent52e44dc8f9e01678f309818c8ca2bc65b8285738 (diff)
downloadpublic-inbox-f99f9048cdac42509fbbc1f97e2af32fa3bffca6.tar.gz
Although unescaped parentheses in URLs are technically allowed,
they are uncommon.  However, Markdown-like syntaxes are
unfortunately common for URLs, so we might as well support them.

This fixes parentheses detection at sentence endings, as seen
in practice on emails.
Diffstat (limited to 'lib/PublicInbox/Linkify.pm')
-rw-r--r--lib/PublicInbox/Linkify.pm14
1 files changed, 10 insertions, 4 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index ea7fd71f..acd2a47e 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -15,7 +15,7 @@ use warnings;
 use Digest::SHA qw/sha1_hex/;
 
 my $SALT = rand;
-my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher)://
+my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher)://
                  [\@:\w\.-]+/
                  (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
                  (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
@@ -27,14 +27,20 @@ sub new { bless {}, shift }
 sub linkify_1 {
         my ($self, $s) = @_;
         $s =~ s!$LINK_RE!
-                my $url = $1;
+                my $beg = $1 || '';
+                my $url = $2;
                 my $end = '';
 
                 # it's fairly common to end URLs in messages with
                 # '.', ',' or ';' to denote the end of a statement;
                 # assume the intent was to end the statement/sentence
                 # in English
-                if ($url =~ s/([\.,;])\z//) {
+                # Markdown compatibility:
+                if ($beg eq '(') {
+                        if ($url =~ s/(\)[\.,;]?)\z//) {
+                                $end = $1;
+                        }
+                } elsif ($url =~ s/([\.,;])\z//) {
                         $end = $1;
                 }
 
@@ -45,7 +51,7 @@ sub linkify_1 {
                 # only escape ampersands, others do not match LINK_RE
                 $url =~ s/&/&#38;/g;
                 $self->{$key} = $url;
-                'PI-LINK-'. $key . $end;
+                $beg . 'PI-LINK-'. $key . $end;
         !ge;
         $s;
 }