diff options
author | Eric Wong <e@80x24.org> | 2015-09-01 08:55:28 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2015-09-01 08:56:12 +0000 |
commit | cd486277621f00c108a49c4ba3c9fedbef0f70d5 (patch) | |
tree | eb00dd1a0dc183f65a1d54bf098c01a54bee7030 /lib | |
parent | 00bb29f93f5c1379f2c73172d2af6c860c73e848 (diff) | |
download | public-inbox-cd486277621f00c108a49c4ba3c9fedbef0f70d5.tar.gz |
We must avoid double-escaping in cases where we have URLs anchored by "<>" in the plain-text as is common (and AFAIK recommended) convention. So we must use a two step linkification process to prevent double-escaping.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/View.pm | 62 |
1 files changed, 47 insertions, 15 deletions
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 45f559ee..3d7ba6f5 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -10,7 +10,8 @@ use Encode::MIME::Header; use Email::MIME::ContentType qw/parse_content_type/; use PublicInbox::Hval; use PublicInbox::MID qw/mid_clean mid_compress mid2path/; -use Digest::SHA; +use Digest::SHA qw/sha1_hex/; +my $SALT = rand; require POSIX; # TODO: make these constants tunable @@ -235,10 +236,35 @@ my $LINK_RE = qr!\b((?:ftp|https?|nntp):// [\@:\w\.-]+/ ?[\@\w\+\&\?\.\%\;/#=-]*)!x; -sub linkify { - # no newlines added here since it'd break the splitting we do - # to fold quotes - $_[0] =~ s!$LINK_RE!<a\nhref="$1">$1</a>!g; +sub linkify_1 { + my ($link_map, $s) = @_; + $s =~ s!$LINK_RE! + my $url = $1; + # salt this, as this could be exploited to show + # links in the HTML which don't show up in the raw mail. + my $key = sha1_hex($url . $SALT); + $link_map->{$key} = $url; + 'PI-LINK-'. $key; + !ge; + $s; +} + +sub linkify_2 { + my ($link_map, $s) = @_; + + # Added "PI-LINK-" prefix to avoid false-positives on git commits + $s =~ s!\bPI-LINK-([a-f0-9]{40})\b! + my $key = $1; + my $url = $link_map->{$key}; + if (defined $url) { + $url = ascii_html($url); + "<a\nhref=\"$url\">$url</a>"; + } else { + # false positive or somebody tried to mess with us + $key; + } + !ge; + $s; } sub flush_quote { @@ -247,13 +273,15 @@ sub flush_quote { if ($full_pfx) { if (!$final && scalar(@$quot) <= MAX_INLINE_QUOTED) { # show quote inline - my $rv = join('', map { linkify($_); $_ } @$quot); + my %l; + my $rv = join('', map { linkify_1(\%l, $_) } @$quot); @$quot = (); - return $rv; + $rv = ascii_html($rv); + return linkify_2(\%l, $rv); } # show a short snippet of quoted text and link to full version: - @$quot = map { s/^(?:>\s*)+//gm; $_ } @$quot; + @$quot = map { s/^(?:>\s*)+//gm; $_ } @$quot; my $cur = join(' ', @$quot); @$quot = split(/\s+/, $cur); $cur = ''; @@ -268,16 +296,19 @@ sub flush_quote { } while (@$quot && length($cur) < MAX_TRUNC_LEN); @$quot = (); $cur =~ s/ \z/ .../s; + $cur = ascii_html($cur); my $nr = ++$$n; "> [<a\nhref=\"$full_pfx#q${part_nr}_$nr\">$cur</a>]\n"; } else { # show everything in the full version with anchor from # short version (see above) my $nr = ++$$n; - my $rv = "<a\nid=q${part_nr}_$nr></a>"; - $rv .= join('', map { linkify($_); $_ } @$quot); + my $rv = ""; + my %l; + $rv .= join('', map { linkify_1(\%l, $_) } @$quot); @$quot = (); - $rv; + $rv = ascii_html($rv); + "<a\nid=q${part_nr}_$nr></a>" . linkify_2(\%l, $rv); } } @@ -297,7 +328,6 @@ sub add_text_body { my $s = $part->body; $part->body_set(''); $s = $enc->decode($s); - $s = ascii_html($s); my @lines = split(/^/m, $s); $s = ''; @@ -309,7 +339,7 @@ sub add_text_body { my @quot; while (defined(my $cur = shift @lines)) { - if ($cur !~ /^>/) { + if ($cur !~ /^>/) { # show the previously buffered quote inline if (scalar @quot) { $s .= flush_quote(\@quot, \$n, $$part_nr, @@ -317,8 +347,10 @@ sub add_text_body { } # regular line, OK - linkify($cur); - $s .= $cur; + my %l; + $cur = linkify_1(\%l, $cur); + $cur = ascii_html($cur); + $s .= linkify_2(\%l, $cur); } else { push @quot, $cur; } |