git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Phillip Wood <phillip.wood@talktalk.net>
To: Git Mailing List <git@vger.kernel.org>,
	Stefan Beller <sbeller@google.com>,
	Junio C Hamano <gitster@pobox.com>
Cc: Phillip Wood <phillip.wood@dunelm.org.uk>
Subject: [PATCH v2 8/9] diff --color-moved-ws: modify allow-indentation-change
Date: Fri, 23 Nov 2018 11:16:57 +0000	[thread overview]
Message-ID: <20181123111658.30342-9-phillip.wood@talktalk.net> (raw)
In-Reply-To: <20181123111658.30342-1-phillip.wood@talktalk.net>

From: Phillip Wood <phillip.wood@dunelm.org.uk>

Currently diff --color-moved-ws=allow-indentation-change does not
support indentation that contains a mix of tabs and spaces. For
example in commit 546f70f377 ("convert.h: drop 'extern' from function
declaration", 2018-06-30) the function parameters in the following
lines are not colored as moved [1].

-extern int stream_filter(struct stream_filter *,
-                        const char *input, size_t *isize_p,
-                        char *output, size_t *osize_p);
+int stream_filter(struct stream_filter *,
+                 const char *input, size_t *isize_p,
+                 char *output, size_t *osize_p);

This commit changes the way the indentation is handled to track the
visual size of the indentation rather than the characters in the
indentation. This has the benefit that any whitespace errors do not
interfer with the move detection (the whitespace errors will still be
highlighted according to --ws-error-highlight). During the discussion
of this feature there were concerns about the correct detection of
indentation for python. However those concerns apply whether or not
we're detecting moved lines so no attempt is made to determine if the
indentation is 'pythonic'.

[1] Note that before the commit to fix the erroneous coloring of moved
    lines each line was colored as a different block, since that commit
    they are uncolored.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 diff.c                     | 130 +++++++++++++++++++++----------------
 t/t4015-diff-whitespace.sh |  56 ++++++++++++++++
 2 files changed, 129 insertions(+), 57 deletions(-)

diff --git a/diff.c b/diff.c
index c378ce3daf..148503e49c 100644
--- a/diff.c
+++ b/diff.c
@@ -750,6 +750,8 @@ struct emitted_diff_symbol {
 	const char *line;
 	int len;
 	int flags;
+	int indent_off;   /* Offset to first non-whitespace character */
+	int indent_width; /* The visual width of the indentation */
 	enum diff_symbol s;
 };
 #define EMITTED_DIFF_SYMBOL_INIT {NULL}
@@ -780,44 +782,68 @@ struct moved_entry {
 	struct moved_entry *next_line;
 };
 
-/**
- * The struct ws_delta holds white space differences between moved lines, i.e.
- * between '+' and '-' lines that have been detected to be a move.
- * The string contains the difference in leading white spaces, before the
- * rest of the line is compared using the white space config for move
- * coloring. The current_longer indicates if the first string in the
- * comparision is longer than the second.
- */
-struct ws_delta {
-	char *string;
-	unsigned int current_longer : 1;
-};
-#define WS_DELTA_INIT { NULL, 0 }
-
 struct moved_block {
 	struct moved_entry *match;
-	struct ws_delta wsd;
+	int wsd; /* The whitespace delta of this block */
 };
 
 static void moved_block_clear(struct moved_block *b)
 {
-	FREE_AND_NULL(b->wsd.string);
-	b->match = NULL;
+	memset(b, 0, sizeof(*b));
+}
+
+static void fill_es_indent_data(struct emitted_diff_symbol *es)
+{
+	unsigned int off = 0;
+	int width = 0, tab_width = es->flags & WS_TAB_WIDTH_MASK;
+	const char *s = es->line;
+	const int len = es->len;
+
+	/* skip any \v \f \r at start of indentation */
+	while (s[off] == '\f' || s[off] == '\v' ||
+	       (s[off] == '\r' && off < len - 1))
+		off++;
+
+	/* calculate the visual width of indentation */
+	while(1) {
+		if (s[off] == ' ') {
+			width++;
+			off++;
+		} else if (s[off] == '\t') {
+			width += tab_width - (width % tab_width);
+			while (s[++off] == '\t')
+				width += tab_width;
+		} else {
+			break;
+		}
+	}
+
+	es->indent_off = off;
+	es->indent_width = width;
 }
 
 static int compute_ws_delta(const struct emitted_diff_symbol *a,
-			     const struct emitted_diff_symbol *b,
-			     struct ws_delta *out)
+			    const struct emitted_diff_symbol *b,
+			    int *out)
 {
-	const struct emitted_diff_symbol *longer =  a->len > b->len ? a : b;
-	const struct emitted_diff_symbol *shorter = a->len > b->len ? b : a;
-	int d = longer->len - shorter->len;
+	int a_len = a->len,
+	    b_len = b->len,
+	    a_off = a->indent_off,
+	    a_width = a->indent_width,
+	    b_off = b->indent_off,
+	    b_width = b->indent_width;
+	int delta;
 
-	if (strncmp(longer->line + d, shorter->line, shorter->len))
+	if (a->s == DIFF_SYMBOL_PLUS)
+		delta = a_width - b_width;
+	else
+		delta = b_width - a_width;
+
+	if (a_len - a_off != b_len - b_off ||
+	    memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
 		return 0;
 
-	out->string = xmemdupz(longer->line, d);
-	out->current_longer = (a == longer);
+	*out = delta;
 
 	return 1;
 }
@@ -833,8 +859,11 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	const char *a = cur->es->line,
 		   *b = match->es->line,
 		   *c = l->line;
-	const char *orig_a = a;
-	int wslen;
+	int a_off = cur->es->indent_off,
+	    a_width = cur->es->indent_width,
+	    c_off = l->indent_off,
+	    c_width = l->indent_width;
+	int delta;
 
 	/*
 	 * We need to check if 'cur' is equal to 'match'.  As those
@@ -848,35 +877,20 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
 	if (al != bl)
 		return 1;
 
-	if (!pmb->wsd.string)
-		/*
-		 * The white space delta is not active? This can happen
-		 * when we exit early in this function.
-		 */
-		return 1;
-
 	/*
-	 * The indent changes of the block are known and stored in
-	 * pmb->wsd; however we need to check if the indent changes of the
-	 * current line are still the same as before.
-	 *
-	 * To do so we need to compare 'l' to 'cur', adjusting the
-	 * one of them for the white spaces, depending which was longer.
+	 * The indent changes of the block are known and stored in pmb->wsd;
+	 * however we need to check if the indent changes of the current line
+	 * match those of the current block and that the text of 'l' and 'cur'
+	 * after the indentation match.
 	 */
+	if (cur->es->s == DIFF_SYMBOL_PLUS)
+		delta = a_width - c_width;
+	else
+		delta = c_width - a_width;
 
-	wslen = strlen(pmb->wsd.string);
-	if (pmb->wsd.current_longer) {
-		c += wslen;
-		cl -= wslen;
-	} else {
-		a += wslen;
-		al -= wslen;
-	}
-
-	if (al != cl || memcmp(orig_a, b, bl) || memcmp(a, c, al))
-		return 1;
-
-	return 0;
+	return !(delta == pmb->wsd && al - a_off == cl - c_off &&
+		 !memcmp(a, b, al) && !
+		 memcmp(a + a_off, c + c_off, al - a_off));
 }
 
 static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
@@ -942,6 +956,9 @@ static void add_lines_to_move_detection(struct diff_options *o,
 			continue;
 		}
 
+		if (o->color_moved_ws_handling &
+		    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+			fill_es_indent_data(&o->emitted_symbols->buf[n]);
 		key = prepare_entry(o, n);
 		if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s)
 			prev_line->next_line = key;
@@ -1020,8 +1037,7 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb,
 
 		if (lp < pmb_nr && rp > -1 && lp < rp) {
 			pmb[lp] = pmb[rp];
-			pmb[rp].match = NULL;
-			pmb[rp].wsd.string = NULL;
+			memset(&pmb[rp], 0, sizeof(pmb[rp]));
 			rp--;
 			lp++;
 		}
@@ -1141,7 +1157,7 @@ static void mark_color_as_moved(struct diff_options *o,
 							     &pmb[pmb_nr].wsd))
 						pmb[pmb_nr++].match = match;
 				} else {
-					pmb[pmb_nr].wsd.string = NULL;
+					pmb[pmb_nr].wsd = 0;
 					pmb[pmb_nr++].match = match;
 				}
 			}
@@ -1507,7 +1523,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
 static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
 			     const char *line, int len, unsigned flags)
 {
-	struct emitted_diff_symbol e = {line, len, flags, s};
+	struct emitted_diff_symbol e = {line, len, flags, 0, 0, s};
 
 	if (o->emitted_symbols)
 		append_emitted_diff_symbol(o, &e);
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index fe8a2ab06e..e023839ba6 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -1901,4 +1901,60 @@ test_expect_success 'compare whitespace delta incompatible with other space opti
 	test_i18ngrep allow-indentation-change err
 '
 
+test_expect_success 'compare mixed whitespace delta across moved blocks' '
+
+	git reset --hard &&
+	tr Q_ "\t " <<-EOF >text.txt &&
+	____Indented text to
+	_Q____be further indented by four spaces across
+	____Qseveral lines
+	QQ____These two lines have had their
+	____indentation reduced by four spaces
+	Qdifferent indentation change
+	____too short
+	EOF
+
+	git add text.txt &&
+	git commit -m "add text.txt" &&
+
+	tr Q_ "\t " <<-EOF >text.txt &&
+	QIndented text to
+	QQbe further indented by four spaces across
+	Q____several lines
+	Q_QThese two lines have had their
+	indentation reduced by four spaces
+	QQdifferent indentation change
+	__Qtoo short
+	EOF
+
+	git -c color.diff.whitespace="normal red" \
+		-c core.whitespace=space-before-tab \
+		diff --color --color-moved --ws-error-highlight=all \
+		--color-moved-ws=allow-indentation-change >actual.raw &&
+	grep -v "index" actual.raw | test_decode_color >actual &&
+
+	cat <<-\EOF >expected &&
+	<BOLD>diff --git a/text.txt b/text.txt<RESET>
+	<BOLD>--- a/text.txt<RESET>
+	<BOLD>+++ b/text.txt<RESET>
+	<CYAN>@@ -1,7 +1,7 @@<RESET>
+	<BOLD;MAGENTA>-<RESET><BOLD;MAGENTA>    Indented text to<RESET>
+	<BOLD;MAGENTA>-<RESET><BRED> <RESET>	<BOLD;MAGENTA>    be further indented by four spaces across<RESET>
+	<BOLD;MAGENTA>-<RESET><BRED>    <RESET>	<BOLD;MAGENTA>several lines<RESET>
+	<BOLD;BLUE>-<RESET>		<BOLD;BLUE>    These two lines have had their<RESET>
+	<BOLD;BLUE>-<RESET><BOLD;BLUE>    indentation reduced by four spaces<RESET>
+	<BOLD;MAGENTA>-<RESET>	<BOLD;MAGENTA>different indentation change<RESET>
+	<RED>-<RESET><RED>    too short<RESET>
+	<BOLD;CYAN>+<RESET>	<BOLD;CYAN>Indented text to<RESET>
+	<BOLD;CYAN>+<RESET>		<BOLD;CYAN>be further indented by four spaces across<RESET>
+	<BOLD;CYAN>+<RESET>	<BOLD;CYAN>    several lines<RESET>
+	<BOLD;YELLOW>+<RESET>	<BRED> <RESET>	<BOLD;YELLOW>These two lines have had their<RESET>
+	<BOLD;YELLOW>+<RESET><BOLD;YELLOW>indentation reduced by four spaces<RESET>
+	<BOLD;CYAN>+<RESET>		<BOLD;CYAN>different indentation change<RESET>
+	<GREEN>+<RESET><BRED>  <RESET>	<GREEN>too short<RESET>
+	EOF
+
+	test_cmp expected actual
+'
+
 test_done
-- 
2.19.1.1690.g258b440b18


  parent reply	other threads:[~2018-11-23 11:17 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-24 10:06 [RFC PATCH 0/3] diff --color-moved-ws: allow mixed spaces and tabs in indentation change Phillip Wood
2018-09-24 10:06 ` [RFC PATCH 1/3] xdiff-interface: make xdl_blankline() available Phillip Wood
2018-09-24 23:19   ` Stefan Beller
2018-09-24 10:06 ` [RFC PATCH 2/3] diff.c: remove unused variables Phillip Wood
2018-09-24 10:06 ` [RFC PATCH 3/3] diff: add --color-moved-ws=allow-mixed-indentation-change Phillip Wood
2018-09-25  1:07   ` Stefan Beller
2018-10-09  9:50     ` Phillip Wood
2018-10-09 21:10       ` Stefan Beller
2018-10-10 15:26         ` Phillip Wood
2018-10-10 18:05           ` Stefan Beller
2018-09-24 11:03 ` [RFC PATCH 0/3] diff --color-moved-ws: allow mixed spaces and tabs in indentation change Phillip Wood
2018-11-16 11:03 ` [PATCH v1 0/9] diff --color-moved-ws fixes and enhancment Phillip Wood
2018-11-16 11:03   ` [PATCH v1 1/9] diff: document --no-color-moved Phillip Wood
2018-11-16 11:03   ` [PATCH v1 2/9] diff: use whitespace consistently Phillip Wood
2018-11-16 18:29     ` Stefan Beller
2018-11-16 11:03   ` [PATCH v1 3/9] diff: allow --no-color-moved-ws Phillip Wood
2018-11-16 11:03   ` [PATCH v1 4/9] diff --color-moved-ws: demonstrate false positives Phillip Wood
2018-11-16 11:03   ` [PATCH v1 5/9] diff --color-moved-ws: fix " Phillip Wood
2018-11-16 11:03   ` [PATCH v1 6/9] diff --color-moved=zebra: be stricter with color alternation Phillip Wood
2018-11-16 11:03   ` [PATCH v1 7/9] diff --color-moved-ws: optimize allow-indentation-change Phillip Wood
2018-11-16 20:40     ` Stefan Beller
2018-11-17 14:52       ` Phillip Wood
2018-11-16 11:03   ` [PATCH v1 8/9] diff --color-moved-ws: modify allow-indentation-change Phillip Wood
2018-11-16 21:47     ` Stefan Beller
2018-11-17 14:59       ` Phillip Wood
2018-11-16 11:03   ` [PATCH v1 9/9] diff --color-moved-ws: handle blank lines Phillip Wood
2018-11-20 18:05     ` Stefan Beller
2018-11-21 15:49       ` Phillip Wood
2018-11-23 11:16 ` [PATCH v2 0/9] diff --color-moved-ws fixes and enhancment Phillip Wood
2018-11-23 11:16   ` [PATCH v2 1/9] diff: document --no-color-moved Phillip Wood
2018-11-23 11:16   ` [PATCH v2 2/9] Use "whitespace" consistently Phillip Wood
2018-11-23 11:16   ` [PATCH v2 3/9] diff: allow --no-color-moved-ws Phillip Wood
2018-11-23 11:16   ` [PATCH v2 4/9] diff --color-moved-ws: demonstrate false positives Phillip Wood
2018-11-23 11:16   ` [PATCH v2 5/9] diff --color-moved-ws: fix " Phillip Wood
2018-11-23 11:16   ` [PATCH v2 6/9] diff --color-moved=zebra: be stricter with color alternation Phillip Wood
2018-11-23 11:16   ` [PATCH v2 7/9] diff --color-moved-ws: optimize allow-indentation-change Phillip Wood
2018-11-23 11:16   ` Phillip Wood [this message]
2018-11-23 11:16   ` [PATCH v2 9/9] diff --color-moved-ws: handle blank lines Phillip Wood
2018-11-26 21:20   ` [PATCH v2 0/9] diff --color-moved-ws fixes and enhancment Stefan Beller
2018-11-27 20:52     ` Phillip Wood
2019-01-08 16:22   ` Phillip Wood
2019-01-08 18:31     ` Junio C Hamano
2019-01-10  0:37       ` Stefan Beller
2019-01-10 18:39         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181123111658.30342-9-phillip.wood@talktalk.net \
    --to=phillip.wood@talktalk.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=phillip.wood@dunelm.org.uk \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).