about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2016-03-01 03:44:04 +0000
committerEric Wong <e@80x24.org>2016-03-01 03:44:26 +0000
commit472d39de46603b180ab6e739e0b31ab7ef559870 (patch)
tree69221f465d404166ab2dcfcc0f44a7c99deb8d31 /lib
parent704d1886ec4c34ffee0a37293970329418582211 (diff)
downloadpublic-inbox-472d39de46603b180ab6e739e0b31ab7ef559870.tar.gz
It seems common for users to end statements with URLs,
while it is rare for a URL itself to end with a '.' or ';'.
So make a guess and assume the URL was intended to not
include the trailing '.' or ';'
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Linkify.pm10
1 files changed, 9 insertions, 1 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 8f634f48..4eddedd0 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -25,6 +25,14 @@ sub linkify_1 {
         my ($self, $s) = @_;
         $s =~ s!$LINK_RE!
                 my $url = $1;
+                my $end = '';
+
+                # it's fairly common to end URLs in messages with
+                # '.' or ';' to denote the end of a statement.
+                if ($url =~ s/(\.)\z// || $url =~ s/(;)\z//) {
+                        $end = $1;
+                }
+
                 # salt this, as this could be exploited to show
                 # links in the HTML which don't show up in the raw mail.
                 my $key = sha1_hex($url . $SALT);
@@ -32,7 +40,7 @@ sub linkify_1 {
                 # only escape ampersands, others do not match LINK_RE
                 $url =~ s/&/&#38;/g;
                 $self->{$key} = $url;
-                'PI-LINK-'. $key;
+                'PI-LINK-'. $key . $end;
         !ge;
         $s;
 }