user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 11/11] view: more robust link generation
  @ 2015-09-01  8:55  7% ` Eric Wong
  0 siblings, 0 replies; 1+ results
From: Eric Wong @ 2015-09-01  8:55 UTC (permalink / raw)
  To: meta

We must avoid double-escaping in cases where we have URLs anchored
by "<>" in the plain-text as is common (and AFAIK recommended)
convention.  So we must use a two step linkification process
to prevent double-escaping.
---
 lib/PublicInbox/View.pm | 62 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 45f559e..3d7ba6f 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -10,7 +10,8 @@ use Encode::MIME::Header;
 use Email::MIME::ContentType qw/parse_content_type/;
 use PublicInbox::Hval;
 use PublicInbox::MID qw/mid_clean mid_compress mid2path/;
-use Digest::SHA;
+use Digest::SHA qw/sha1_hex/;
+my $SALT = rand;
 require POSIX;
 
 # TODO: make these constants tunable
@@ -235,10 +236,35 @@ my $LINK_RE = qr!\b((?:ftp|https?|nntp)://
 		 [\@:\w\.-]+/
 		 ?[\@\w\+\&\?\.\%\;/#=-]*)!x;
 
-sub linkify {
-	# no newlines added here since it'd break the splitting we do
-	# to fold quotes
-	$_[0] =~ s!$LINK_RE!<a\nhref="$1">$1</a>!g;
+sub linkify_1 {
+	my ($link_map, $s) = @_;
+	$s =~ s!$LINK_RE!
+		my $url = $1;
+		# salt this, as this could be exploited to show
+		# links in the HTML which don't show up in the raw mail.
+		my $key = sha1_hex($url . $SALT);
+		$link_map->{$key} = $url;
+		'PI-LINK-'. $key;
+	!ge;
+	$s;
+}
+
+sub linkify_2 {
+	my ($link_map, $s) = @_;
+
+	# Added "PI-LINK-" prefix to avoid false-positives on git commits
+	$s =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+		my $key = $1;
+		my $url = $link_map->{$key};
+		if (defined $url) {
+			$url = ascii_html($url);
+			"<a\nhref=\"$url\">$url</a>";
+		} else {
+			# false positive or somebody tried to mess with us
+			$key;
+		}
+	!ge;
+	$s;
 }
 
 sub flush_quote {
@@ -247,13 +273,15 @@ sub flush_quote {
 	if ($full_pfx) {
 		if (!$final && scalar(@$quot) <= MAX_INLINE_QUOTED) {
 			# show quote inline
-			my $rv = join('', map { linkify($_); $_ } @$quot);
+			my %l;
+			my $rv = join('', map { linkify_1(\%l, $_) } @$quot);
 			@$quot = ();
-			return $rv;
+			$rv = ascii_html($rv);
+			return linkify_2(\%l, $rv);
 		}
 
 		# show a short snippet of quoted text and link to full version:
-		@$quot = map { s/^(?:&gt;\s*)+//gm; $_ } @$quot;
+		@$quot = map { s/^(?:>\s*)+//gm; $_ } @$quot;
 		my $cur = join(' ', @$quot);
 		@$quot = split(/\s+/, $cur);
 		$cur = '';
@@ -268,16 +296,19 @@ sub flush_quote {
 		} while (@$quot && length($cur) < MAX_TRUNC_LEN);
 		@$quot = ();
 		$cur =~ s/ \z/ .../s;
+		$cur = ascii_html($cur);
 		my $nr = ++$$n;
 		"&gt; [<a\nhref=\"$full_pfx#q${part_nr}_$nr\">$cur</a>]\n";
 	} else {
 		# show everything in the full version with anchor from
 		# short version (see above)
 		my $nr = ++$$n;
-		my $rv = "<a\nid=q${part_nr}_$nr></a>";
-		$rv .= join('', map { linkify($_); $_ } @$quot);
+		my $rv = "";
+		my %l;
+		$rv .= join('', map { linkify_1(\%l, $_) } @$quot);
 		@$quot = ();
-		$rv;
+		$rv = ascii_html($rv);
+		"<a\nid=q${part_nr}_$nr></a>" . linkify_2(\%l, $rv);
 	}
 }
 
@@ -297,7 +328,6 @@ sub add_text_body {
 	my $s = $part->body;
 	$part->body_set('');
 	$s = $enc->decode($s);
-	$s = ascii_html($s);
 	my @lines = split(/^/m, $s);
 	$s = '';
 
@@ -309,7 +339,7 @@ sub add_text_body {
 
 	my @quot;
 	while (defined(my $cur = shift @lines)) {
-		if ($cur !~ /^&gt;/) {
+		if ($cur !~ /^>/) {
 			# show the previously buffered quote inline
 			if (scalar @quot) {
 				$s .= flush_quote(\@quot, \$n, $$part_nr,
@@ -317,8 +347,10 @@ sub add_text_body {
 			}
 
 			# regular line, OK
-			linkify($cur);
-			$s .= $cur;
+			my %l;
+			$cur = linkify_1(\%l, $cur);
+			$cur = ascii_html($cur);
+			$s .= linkify_2(\%l, $cur);
 		} else {
 			push @quot, $cur;
 		}
-- 
EW


^ permalink raw reply related	[relevance 7%]

Results 1-1 of 1 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-09-01  8:55     [PATCH 01/11] search: reduce redundant doc data Eric Wong
2015-09-01  8:55  7% ` [PATCH 11/11] view: more robust link generation Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).