git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Phillip Wood <phillip.wood@talktalk.net>
To: Git Mailing List <git@vger.kernel.org>
Cc: Stefan Beller <sbeller@google.com>,
	Phillip Wood <phillip.wood@dunelm.org.uk>
Subject: [RFC PATCH 3/3] diff: add --color-moved-ws=allow-mixed-indentation-change
Date: Mon, 24 Sep 2018 11:06:04 +0100	[thread overview]
Message-ID: <20180924100604.32208-4-phillip.wood@talktalk.net> (raw)
In-Reply-To: <20180924100604.32208-1-phillip.wood@talktalk.net>

From: Phillip Wood <phillip.wood@dunelm.org.uk>

This adds another mode for highlighting lines that have moved with an
indentation change. Unlike the existing
--color-moved-ws=allow-indentation-change setting this mode uses the
visible change in the indentation to group lines, rather than the
indentation string. This means it works with files that use a mix of
tabs and spaces for indentation and can cope with whitespace errors
where there is a space before a tab (it's the job of
--ws-error-highlight to deal with those errors, it should affect the
move detection). It will also group the lines either
side of a blank line if their indentation change matches so short
lines followed by a blank line followed by more lines with the same
indentation change will be correctly highlighted.

This is a RFC as there are a number of questions about how to proceed
from here:
 1) Do we need a second option or should this implementation replace
    --color-moved-ws=allow-indentation-change. I'm unclear if that mode
    has any advantages for some people. There seems to have been an
    intention [1] to get it working with mixes of tabs and spaces but
    nothing ever came of it.
 2) If we keep two options what should this option be called, the name
    is long and ambiguous at the moment - mixed could refer to mixed
    indentation length rather than a mix of tabs and spaces.
 3) Should we support whitespace flags with this mode?
    --ignore-space-at-eol and --ignore-cr-at eol would be fairly simple
    to support and I can see a use for them, --ignore-all-space and
    --ignore-space-change would need some changes to xdiff to allow them
    to apply only after the indentation. I think --ignore-blank-lines
    would need a bit of work to get it working as well. (Note the
    existing mode does not support any of these flags either)

[1] https://public-inbox.org/git/CAGZ79kasAqE+=7ciVrdjoRdu0UFjVBr8Ma502nw+3hZL=ebXYQ@mail.gmail.com/

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 diff.c                     | 122 +++++++++++++++++++++++++++++++++----
 diff.h                     |   1 +
 t/t4015-diff-whitespace.sh |  89 +++++++++++++++++++++++++++
 3 files changed, 199 insertions(+), 13 deletions(-)

diff --git a/diff.c b/diff.c
index 0a652e28d4..45f33daa60 100644
--- a/diff.c
+++ b/diff.c
@@ -304,7 +304,11 @@ static int parse_color_moved_ws(const char *arg)
 		else if (!strcmp(sb.buf, "ignore-all-space"))
 			ret |= XDF_IGNORE_WHITESPACE;
 		else if (!strcmp(sb.buf, "allow-indentation-change"))
-			ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE;
+			ret = COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE |
+			 (ret & ~COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE);
+		else if (!strcmp(sb.buf, "allow-mixed-indentation-change"))
+			ret = COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE |
+			 (ret & ~COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE);
 		else
 			error(_("ignoring unknown color-moved-ws mode '%s'"), sb.buf);
 
@@ -314,6 +318,9 @@ static int parse_color_moved_ws(const char *arg)
 	if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) &&
 	    (ret & XDF_WHITESPACE_FLAGS))
 		die(_("color-moved-ws: allow-indentation-change cannot be combined with other white space modes"));
+	else if ((ret & COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE) &&
+		 (ret & XDF_WHITESPACE_FLAGS))
+		die(_("color-moved-ws: allow-mixed-indentation-change cannot be combined with other white space modes"));
 
 	string_list_clear(&l, 0);
 
@@ -763,11 +770,65 @@ struct moved_entry {
  * comparision is longer than the second.
  */
 struct ws_delta {
-	char *string;
+	union {
+		int delta;
+		char *string;
+	};
 	unsigned int current_longer : 1;
+	unsigned int have_string : 1;
 };
 #define WS_DELTA_INIT { NULL, 0 }
 
+static int compute_mixed_ws_delta(const struct emitted_diff_symbol *a,
+				  const struct emitted_diff_symbol *b,
+				  int *delta)
+{
+	unsigned int i = 0, j = 0;
+	int la, lb;
+	int ta = a->flags & WS_TAB_WIDTH_MASK;
+	int tb = b->flags & WS_TAB_WIDTH_MASK;
+	const char *sa = a->line;
+	const char *sb = b->line;
+
+	if (xdiff_is_blankline(sa, a->len, 0) &&
+	    xdiff_is_blankline(sb, b->len, 0)) {
+		*delta = INT_MIN;
+		return 1;
+	}
+
+	/* skip any \v \f \r at start of indentation */
+	while (sa[i] == '\f' || sa[i] == '\v' ||
+	       (sa[i] == '\r' && i < a->len - 1))
+		i++;
+	while (sb[j] == '\f' || sb[j] == '\v' ||
+	       (sb[j] == '\r' && j < b->len - 1))
+		j++;
+
+	for (la = 0; ; i++) {
+		if (sa[i] == ' ')
+			la++;
+		else if (sa[i] == '\t')
+			la = ((la + ta) / ta) * ta;
+		else
+			break;
+	}
+	for (lb = 0; ; j++) {
+		if (sb[j] == ' ')
+			lb++;
+		else if (sb[j] == '\t')
+			lb = ((lb + tb) / tb) * tb;
+		else
+			break;
+	}
+	if (a->s == DIFF_SYMBOL_PLUS)
+		*delta = la - lb;
+	else
+		*delta = lb - la;
+
+	return (a->len - i == b->len - j) &&
+		!memcmp(sa + i, sb + j, a->len - i);
+}
+
 static int compute_ws_delta(const struct emitted_diff_symbol *a,
 			     const struct emitted_diff_symbol *b,
 			     struct ws_delta *out)
@@ -778,6 +839,7 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a,
 
 	out->string = xmemdupz(longer->line, d);
 	out->current_longer = (a == longer);
+	out->have_string = 1;
 
 	return !strncmp(longer->line + d, shorter->line, shorter->len);
 }
@@ -820,15 +882,34 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	 * To do so we need to compare 'l' to 'cur', adjusting the
 	 * one of them for the white spaces, depending which was longer.
 	 */
+	if (o->color_moved_ws_handling &
+	    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) {
+		wslen = strlen(pmb->wsd->string);
+		if (pmb->wsd->current_longer)
+			c += wslen;
+		else
+			a += wslen;
 
-	wslen = strlen(pmb->wsd->string);
-	if (pmb->wsd->current_longer)
-		c += wslen;
-	else
-		a += wslen;
+		if (strcmp(a, c))
+			return 1;
 
-	if (strcmp(a, c))
-		return 1;
+		return 0;
+	} else if (o->color_moved_ws_handling &
+		   COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE) {
+		int delta;
+
+		if (!compute_mixed_ws_delta(cur->es, l, &delta))
+		    return 1;
+
+		if (pmb->wsd->delta == INT_MIN) {
+			pmb->wsd->delta = delta;
+			return 0;
+		}
+
+		return !(delta == pmb->wsd->delta || delta == INT_MIN);
+	} else {
+		BUG("no color_moved_ws_allow_indentation_change set");
+	}
 
 	return 0;
 }
@@ -845,7 +926,8 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
 			 & XDF_WHITESPACE_FLAGS;
 
 	if (diffopt->color_moved_ws_handling &
-	    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+	    (COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE |
+	     COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE))
 		/*
 		 * As there is not specific white space config given,
 		 * we'd need to check for a new block, so ignore all
@@ -953,7 +1035,8 @@ static void pmb_advance_or_null_multi_match(struct diff_options *o,
 			pmb[i] = pmb[i]->next_line;
 		} else {
 			if (pmb[i]->wsd) {
-				free(pmb[i]->wsd->string);
+				if (pmb[i]->wsd->have_string)
+					free(pmb[i]->wsd->string);
 				FREE_AND_NULL(pmb[i]->wsd);
 			}
 			pmb[i] = NULL;
@@ -1066,7 +1149,8 @@ static void mark_color_as_moved(struct diff_options *o,
 			continue;
 
 		if (o->color_moved_ws_handling &
-		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+		    (COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE |
+		     COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE))
 			pmb_advance_or_null_multi_match(o, match, hm, pmb, pmb_nr, n);
 		else
 			pmb_advance_or_null(o, match, hm, pmb, pmb_nr);
@@ -1088,6 +1172,17 @@ static void mark_color_as_moved(struct diff_options *o,
 						pmb[pmb_nr++] = match;
 					} else
 						free(wsd);
+				} else if (o->color_moved_ws_handling &
+					   COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE) {
+					int delta;
+
+					if (compute_mixed_ws_delta(l, match->es, &delta)) {
+						struct ws_delta *wsd = xmalloc(sizeof(*match->wsd));
+						wsd->delta = delta;
+						wsd->have_string = 0;
+						match->wsd = wsd;
+						pmb[pmb_nr++] = match;
+					}
 				} else {
 					pmb[pmb_nr++] = match;
 				}
@@ -5740,7 +5835,8 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 			struct hashmap add_lines, del_lines;
 
 			if (o->color_moved_ws_handling &
-			    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+			    (COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE |
+			     COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE))
 				o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE;
 
 			hashmap_init(&del_lines, moved_entry_cmp, o, 0);
diff --git a/diff.h b/diff.h
index 5e6bcf0926..03628cda45 100644
--- a/diff.h
+++ b/diff.h
@@ -217,6 +217,7 @@ struct diff_options {
 
 	/* XDF_WHITESPACE_FLAGS regarding block detection are set at 2, 3, 4 */
 	#define COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE (1<<5)
+	#define COLOR_MOVED_WS_ALLOW_MIXED_INDENTATION_CHANGE (1<<6)
 	int color_moved_ws_handling;
 };
 
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index 41facf7abf..737dbd4a42 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -1902,4 +1902,93 @@ test_expect_success 'compare whitespace delta incompatible with other space opti
 	test_i18ngrep allow-indentation-change err
 '
 
+NUL=''
+test_expect_success 'compare mixed whitespace delta across moved blocks' '
+
+	git reset --hard &&
+	tr Q_ "\t " <<-EOF >text.txt &&
+	${NUL}
+	____too short without
+	${NUL}
+	____being grouped across blank line
+	${NUL}
+	context
+	lines
+	to
+	anchor
+	____Indented text to
+	_Q____be further indented by four spaces across
+	____Qseveral lines
+	QQ____These two lines have had their
+	____indentation reduced by four spaces
+	Qdifferent indentation change
+	____too short
+	EOF
+
+	git add text.txt &&
+	git commit -m "add text.txt" &&
+
+	tr Q_ "\t " <<-EOF >text.txt &&
+	context
+	lines
+	to
+	anchor
+	QIndented text to
+	QQbe further indented by four spaces across
+	Q____several lines
+	${NUL}
+	QQtoo short without
+	${NUL}
+	QQbeing grouped across blank line
+	${NUL}
+	Q_QThese two lines have had their
+	indentation reduced by four spaces
+	QQdifferent indentation change
+	__Qtoo short
+	EOF
+
+	git -c color.diff.whitespace="normal red" \
+		-c core.whitespace=space-before-tab \
+		diff --color --color-moved --ws-error-highlight=all \
+		--color-moved-ws=allow-mixed-indentation-change >actual.raw &&
+	grep -v "index" actual.raw | test_decode_color >actual &&
+
+	cat <<-\EOF >expected &&
+	<BOLD>diff --git a/text.txt b/text.txt<RESET>
+	<BOLD>--- a/text.txt<RESET>
+	<BOLD>+++ b/text.txt<RESET>
+	<CYAN>@@ -1,16 +1,16 @@<RESET>
+	<BOLD;MAGENTA>-<RESET>
+	<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>    too short without<RESET>
+	<BOLD;MAGENTA>-<RESET>
+	<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>    being grouped across blank line<RESET>
+	<BOLD;MAGENTA>-<RESET>
+	 <RESET>context<RESET>
+	 <RESET>lines<RESET>
+	 <RESET>to<RESET>
+	 <RESET>anchor<RESET>
+	<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>    Indented text to<RESET>
+	<BOLD;MAGENTA>-<RESET><BRED> <RESET>	<BOLD;MAGENTA>    be further indented by four spaces across<RESET>
+	<BOLD;MAGENTA>-<RESET><BRED>    <RESET>	<BOLD;MAGENTA>several lines<RESET>
+	<BOLD;BLUE>-<RESET>		<BOLD;BLUE>    These two lines have had their<RESET>
+	<BOLD;BLUE>-<RESET><BOLD;BLUE>    indentation reduced by four spaces<RESET>
+	<BOLD;MAGENTA>-<RESET>	<BOLD;MAGENTA>different indentation change<RESET>
+	<RED>-<RESET><RED>    too short<RESET>
+	<BOLD;CYAN>+<RESET>	<BOLD;CYAN>Indented text to<RESET>
+	<BOLD;CYAN>+<RESET>		<BOLD;CYAN>be further indented by four spaces across<RESET>
+	<BOLD;CYAN>+<RESET>	<BOLD;CYAN>    several lines<RESET>
+	<BOLD;YELLOW>+<RESET>
+	<BOLD;YELLOW>+<RESET>		<BOLD;YELLOW>too short without<RESET>
+	<BOLD;YELLOW>+<RESET>
+	<BOLD;YELLOW>+<RESET>		<BOLD;YELLOW>being grouped across blank line<RESET>
+	<BOLD;YELLOW>+<RESET>
+	<BOLD;CYAN>+<RESET>	<BRED> <RESET>	<BOLD;CYAN>These two lines have had their<RESET>
+	<BOLD;CYAN>+<RESET><BOLD;CYAN>indentation reduced by four spaces<RESET>
+	<BOLD;YELLOW>+<RESET>		<BOLD;YELLOW>different indentation change<RESET>
+	<GREEN>+<RESET><BRED>  <RESET>	<GREEN>too short<RESET>
+	EOF
+
+	test_cmp expected actual
+'
+
 test_done
-- 
2.19.0


  parent reply	other threads:[~2018-09-24 10:06 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-24 10:06 [RFC PATCH 0/3] diff --color-moved-ws: allow mixed spaces and tabs in indentation change Phillip Wood
2018-09-24 10:06 ` [RFC PATCH 1/3] xdiff-interface: make xdl_blankline() available Phillip Wood
2018-09-24 23:19   ` Stefan Beller
2018-09-24 10:06 ` [RFC PATCH 2/3] diff.c: remove unused variables Phillip Wood
2018-09-24 10:06 ` Phillip Wood [this message]
2018-09-25  1:07   ` [RFC PATCH 3/3] diff: add --color-moved-ws=allow-mixed-indentation-change Stefan Beller
2018-10-09  9:50     ` Phillip Wood
2018-10-09 21:10       ` Stefan Beller
2018-10-10 15:26         ` Phillip Wood
2018-10-10 18:05           ` Stefan Beller
2018-09-24 11:03 ` [RFC PATCH 0/3] diff --color-moved-ws: allow mixed spaces and tabs in indentation change Phillip Wood
2018-11-16 11:03 ` [PATCH v1 0/9] diff --color-moved-ws fixes and enhancment Phillip Wood
2018-11-16 11:03   ` [PATCH v1 1/9] diff: document --no-color-moved Phillip Wood
2018-11-16 11:03   ` [PATCH v1 2/9] diff: use whitespace consistently Phillip Wood
2018-11-16 18:29     ` Stefan Beller
2018-11-16 11:03   ` [PATCH v1 3/9] diff: allow --no-color-moved-ws Phillip Wood
2018-11-16 11:03   ` [PATCH v1 4/9] diff --color-moved-ws: demonstrate false positives Phillip Wood
2018-11-16 11:03   ` [PATCH v1 5/9] diff --color-moved-ws: fix " Phillip Wood
2018-11-16 11:03   ` [PATCH v1 6/9] diff --color-moved=zebra: be stricter with color alternation Phillip Wood
2018-11-16 11:03   ` [PATCH v1 7/9] diff --color-moved-ws: optimize allow-indentation-change Phillip Wood
2018-11-16 20:40     ` Stefan Beller
2018-11-17 14:52       ` Phillip Wood
2018-11-16 11:03   ` [PATCH v1 8/9] diff --color-moved-ws: modify allow-indentation-change Phillip Wood
2018-11-16 21:47     ` Stefan Beller
2018-11-17 14:59       ` Phillip Wood
2018-11-16 11:03   ` [PATCH v1 9/9] diff --color-moved-ws: handle blank lines Phillip Wood
2018-11-20 18:05     ` Stefan Beller
2018-11-21 15:49       ` Phillip Wood
2018-11-23 11:16 ` [PATCH v2 0/9] diff --color-moved-ws fixes and enhancment Phillip Wood
2018-11-23 11:16   ` [PATCH v2 1/9] diff: document --no-color-moved Phillip Wood
2018-11-23 11:16   ` [PATCH v2 2/9] Use "whitespace" consistently Phillip Wood
2018-11-23 11:16   ` [PATCH v2 3/9] diff: allow --no-color-moved-ws Phillip Wood
2018-11-23 11:16   ` [PATCH v2 4/9] diff --color-moved-ws: demonstrate false positives Phillip Wood
2018-11-23 11:16   ` [PATCH v2 5/9] diff --color-moved-ws: fix " Phillip Wood
2018-11-23 11:16   ` [PATCH v2 6/9] diff --color-moved=zebra: be stricter with color alternation Phillip Wood
2018-11-23 11:16   ` [PATCH v2 7/9] diff --color-moved-ws: optimize allow-indentation-change Phillip Wood
2018-11-23 11:16   ` [PATCH v2 8/9] diff --color-moved-ws: modify allow-indentation-change Phillip Wood
2018-11-23 11:16   ` [PATCH v2 9/9] diff --color-moved-ws: handle blank lines Phillip Wood
2018-11-26 21:20   ` [PATCH v2 0/9] diff --color-moved-ws fixes and enhancment Stefan Beller
2018-11-27 20:52     ` Phillip Wood
2019-01-08 16:22   ` Phillip Wood
2019-01-08 18:31     ` Junio C Hamano
2019-01-10  0:37       ` Stefan Beller
2019-01-10 18:39         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180924100604.32208-4-phillip.wood@talktalk.net \
    --to=phillip.wood@talktalk.net \
    --cc=git@vger.kernel.org \
    --cc=phillip.wood@dunelm.org.uk \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).