user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/3] viewdiff: support renames and long paths in diffstat anchors
Date: Fri,  1 Feb 2019 07:50:30 +0000	[thread overview]
Message-ID: <20190201075030.23449-4-e@80x24.org> (raw)
In-Reply-To: <20190201075030.23449-1-e@80x24.org>

This is best-effort, but works well-enough in practice for
projects which use shell-friendly filenames as well as the
long path names for some Linux kernel selftests.
---
 lib/PublicInbox/View.pm     |  1 +
 lib/PublicInbox/ViewDiff.pm | 62 ++++++++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 69aca3d..e64c965 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -571,6 +571,7 @@ sub add_text_body {
 		$ctx->{-apfx} = join('/', @idx);
 		$ctx->{-anchors} = {}; # attr => filename
 		$ctx->{-diff} = $diff = [];
+		delete $ctx->{-long_path};
 		my $spfx;
 		if ($ibx->{-repo_objs}) {
 			if (index($upfx, '//') >= 0) { # absolute URL (Atom feeds)
diff --git a/lib/PublicInbox/ViewDiff.pm b/lib/PublicInbox/ViewDiff.pm
index fbdc5b9..c818203 100644
--- a/lib/PublicInbox/ViewDiff.pm
+++ b/lib/PublicInbox/ViewDiff.pm
@@ -77,10 +77,28 @@ sub to_state ($$$) {
 
 sub anchor0 ($$$$$) {
 	my ($dst, $ctx, $linkify, $fn, $rest) = @_;
+
+	my $orig = $fn;
+
+	# normal git diffstat output is impossible to parse reliably
+	# without --numstat, and that isn't the default for format-patch.
+	# So only do best-effort handling of renames for common cases;
+	# which works well in practice. If projects put "=>", or trailing
+	# spaces in filenames, oh well :P
+	$fn =~ s/ +\z//s;
+	$fn =~ s/{(?:.+) => (.+)}/$1/ or $fn =~ s/.* => (.+)/$1/;
+	$fn = git_unquote($fn);
+
+	# long filenames will require us to walk backwards in anchor1
+	if ($fn =~ s!\A\.\.\./?!!) {
+		my $lp = $ctx->{-long_path} ||= {};
+		$lp->{$fn} = qr/\Q$fn\E\z/s;
+	}
+
 	if (my $attr = to_attr($ctx->{-apfx}.$fn)) {
 		$ctx->{-anchors}->{$attr} = 1;
 		$$dst .= " <a\nid=i$attr\nhref=#$attr>" .
-			ascii_html($fn) . '</a>'.
+			ascii_html($orig) . '</a>'.
 			to_html($linkify, $rest);
 		return 1;
 	}
@@ -92,7 +110,21 @@ sub anchor1 ($$$$$) {
 	my $attr = to_attr($ctx->{-apfx}.$pb) or return;
 	my $line = to_html($linkify, $s);
 
-	if (delete $ctx->{-anchors}->{$attr} && $line =~ s/^diff //) {
+	my $ok = delete $ctx->{-anchors}->{$attr};
+
+	# unlikely, check the end of all long path names we captured:
+	unless ($ok) {
+		my $lp = $ctx->{-long_path} or return;
+		foreach my $fn (keys %$lp) {
+			$pb =~ $lp->{$fn} or next;
+
+			delete $lp->{$fn};
+			$attr = to_attr($ctx->{-apfx}.$fn) or return;
+			$ok = delete $ctx->{-anchors}->{$attr} or return;
+			last;
+		}
+	}
+	if ($ok && $line =~ s/^diff //) {
 		$$dst .= "<a\nhref=#i$attr\nid=$attr>diff</a> ".$line;
 		return 1;
 	}
@@ -113,7 +145,7 @@ sub flush_diff ($$$) {
 		} elsif ($s =~ /^ /) {
 			# works for common cases, but not weird/long filenames
 			if ($state == DSTATE_STAT &&
-					$s =~ /^ (\S+)(\s+\|.*\z)/s) {
+					$s =~ /^ (.+)( +\| .*\z)/s) {
 				anchor0($dst, $ctx, $linkify, $1, $2) and next;
 			} elsif ($state2class[$state]) {
 				to_state($dst, $state, DSTATE_CTX);
@@ -124,20 +156,20 @@ sub flush_diff ($$$) {
 				to_state($dst, $state, DSTATE_INIT);
 			$$dst .= $s;
 		} elsif ($s =~ m!^diff --git ($PATH_A) ($PATH_B)$!) {
+			my ($pa, $pb) = ($1, $2);
 			if ($state != DSTATE_HEAD) {
-				my ($pa, $pb) = ($1, $2);
 				to_state($dst, $state, DSTATE_HEAD);
-				$pa = (split('/', git_unquote($pa), 2))[1];
-				$pb = (split('/', git_unquote($pb), 2))[1];
-				$dctx = {
-					Q => "?b=".uri_escape_utf8($pb, UNSAFE),
-				};
-				if ($pa ne $pb) {
-					$dctx->{Q} .= '&amp;a='.
-						uri_escape_utf8($pa, UNSAFE);
-				}
-				anchor1($dst, $ctx, $linkify, $pb, $s) and next;
 			}
+			$pa = (split('/', git_unquote($pa), 2))[1];
+			$pb = (split('/', git_unquote($pb), 2))[1];
+			$dctx = {
+				Q => "?b=".uri_escape_utf8($pb, UNSAFE),
+			};
+			if ($pa ne $pb) {
+				$dctx->{Q} .= '&amp;a='.
+					uri_escape_utf8($pa, UNSAFE);
+			}
+			anchor1($dst, $ctx, $linkify, $pb, $s) and next;
 			$$dst .= to_html($linkify, $s);
 		} elsif ($s =~ s/^(index $OID_NULL\.\.)($OID_BLOB)\b//o) {
 			$$dst .= $1 . oid($dctx, $spfx, $2);
@@ -160,7 +192,7 @@ sub flush_diff ($$$) {
 			$$dst .= to_html($linkify, $s);
 		} elsif ($s =~ m!^--- $PATH_A! ||
 		         $s =~ m!^\+{3} $PATH_B!)  {
-			# color only (no oid link)
+			# color only (no oid link) if missing dctx->{oid_*}
 			$state <= DSTATE_STAT and
 				to_state($dst, $state, DSTATE_HEAD);
 			$$dst .= to_html($linkify, $s);
-- 
EW


      parent reply	other threads:[~2019-02-01  7:50 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-01  7:50 [PATCH 0/3] viewvcs odds and ends Eric Wong
2019-02-01  7:50 ` [PATCH 1/3] viewvcs: allow '0' as a valid filename for blob downloads Eric Wong
2019-02-01  7:50 ` [PATCH 2/3] viewdiff: escape HTML ampersand for renames Eric Wong
2019-02-01  7:50 ` Eric Wong [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190201075030.23449-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).