about summary refs log tree commit homepage
path: root/lib/PublicInbox/Linkify.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-12-06 23:01:39 +0000
committerEric Wong <e@80x24.org>2016-12-06 23:12:18 +0000
commit130d0c4e33c5c73dc69e270fc698735d49e0f159 (patch)
tree9c64ef335c53611f45eff1b2edb5e6ccb741d952 /lib/PublicInbox/Linkify.pm
parent95d4bf7aded41cb3b0040c321d315532f68633e1 (diff)
downloadpublic-inbox-130d0c4e33c5c73dc69e270fc698735d49e0f159.tar.gz
Although unescaped parentheses in URLs are technically allowed,
they are uncommon.  However, Markdown-like syntaxes are
unfortunately common for URLs, so we might as well support them.
Diffstat (limited to 'lib/PublicInbox/Linkify.pm')
-rw-r--r--lib/PublicInbox/Linkify.pm15
1 files changed, 11 insertions, 4 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index ea7fd71f..cc0f7e3a 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -15,7 +15,7 @@ use warnings;
 use Digest::SHA qw/sha1_hex/;
 
 my $SALT = rand;
-my $LINK_RE = qr{\b((?:ftps?|https?|nntps?|gopher)://
+my $LINK_RE = qr{(\()?\b((?:ftps?|https?|nntps?|gopher)://
                  [\@:\w\.-]+/
                  (?:[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]*)
                  (?:\?[a-z0-9\-\._~!\$\&\';\(\)\*\+,;=:@/%]+)?
@@ -27,15 +27,22 @@ sub new { bless {}, shift }
 sub linkify_1 {
         my ($self, $s) = @_;
         $s =~ s!$LINK_RE!
-                my $url = $1;
+                my $beg = $1 || '';
+                my $url = $2;
                 my $end = '';
 
+                # Markdown compatibility:
+                if ($beg eq '(') {
+                        $url =~ s/\)\z//;
+                        $end = ')';
+                }
+
                 # it's fairly common to end URLs in messages with
                 # '.', ',' or ';' to denote the end of a statement;
                 # assume the intent was to end the statement/sentence
                 # in English
                 if ($url =~ s/([\.,;])\z//) {
-                        $end = $1;
+                        $end = $1 . $end;
                 }
 
                 # salt this, as this could be exploited to show
@@ -45,7 +52,7 @@ sub linkify_1 {
                 # only escape ampersands, others do not match LINK_RE
                 $url =~ s/&/&#38;/g;
                 $self->{$key} = $url;
-                'PI-LINK-'. $key . $end;
+                $beg . 'PI-LINK-'. $key . $end;
         !ge;
         $s;
 }