git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Stefan Beller <sbeller@google.com>
To: git@vger.kernel.org
Cc: Stefan Beller <sbeller@google.com>
Subject: [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently
Date: Fri,  2 Sep 2016 20:31:20 -0700	[thread overview]
Message-ID: <20160903033120.20511-3-sbeller@google.com> (raw)
In-Reply-To: <20160903033120.20511-1-sbeller@google.com>

When moving code (e.g. a function is moved to another part of the file or
to a different file), the review process is different than reviewing new
code. When reviewing moved code we are only interested in the diff as
where there are differences in the moved code, e.g. namespace changes.

However the inner part of these moved texts should not change.
To aid a developer reviewing such code, emit it with a different prefix
than the usual +,- to indicate it is overlapping code.

Examples from recent history:
    git show e28eae3184b26d3cf3293e69403babb5c575342c
    git show bc9204d4ef6e0672389fdfb0d398fa9a39dba3d5
    git show 8465541e8ce8eaf16e66ab847086779768c18f2d

This doesn't work yet, but we should make this patch series work
to ignore white space changes:
9d1ca1dac0ebfd6e17d73e33b2d173926c139c2d

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xdiff.h |   1 +
 xdiff/xemit.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 7423f77..0744e01 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -45,6 +45,7 @@ extern "C" {
 
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
+#define XDL_EMIT_DUPLICATE (1 << 3)
 
 #define XDL_MMB_READONLY (1 << 0)
 
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index b52b4b9..4abafae 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -22,6 +22,9 @@
 
 #include "xinclude.h"
 
+#include "git-compat-util.h"
+#include "hashmap.h"
+
 static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) {
 
 	*rec = xdf->recs[ri]->ptr;
@@ -158,12 +161,133 @@ static int is_empty_rec(xdfile_t *xdf, long ri)
 	return !len;
 }
 
+struct hashmap *duplicates_added;
+struct hashmap *duplicates_removed;
+
+struct dup_entry {
+	struct hashmap_entry ent;
+	xdfile_t *xdf;
+	long index;
+};
+
+static int dup_entry_cmp(const struct dup_entry *a,
+			   const struct dup_entry *b,
+			   const void *unused)
+{
+	int d = XDL_MIN(strcspn(a->xdf->recs[a->index]->ptr, "\n"),
+				strcspn(b->xdf->recs[b->index]->ptr, "\n"));
+
+	if (!strncmp(a->xdf->recs[a->index]->ptr,
+			b->xdf->recs[b->index]->ptr,
+			d))
+		return 0;
+	return 1;
+}
+
+struct dup_entry *prepare_entry(xdfile_t *xdf, long ri)
+{
+	long range_start = XDL_MAX(0, ri - 2);
+	long range_end = XDL_MIN(xdf->nrec, ri + 2);
+	long hash = 0;
+	int i;
+	struct dup_entry *ret = xmalloc(sizeof(*ret));
+
+	for (i = range_start; i < range_end; i++)
+		hash ^= memhash(xdf->recs[i]->ptr, xdf->recs[i]->size);
+
+	ret->ent.hash = hash;
+	ret->xdf = xdf;
+	ret->index = ri;
+	return ret;
+}
+
+int add_removal(xdfile_t *xdf, long ri)
+{
+	hashmap_add(duplicates_removed, prepare_entry(xdf, ri));
+	return 0;
+}
+
+int add_addition(xdfile_t *xdf, long ri)
+{
+	hashmap_add(duplicates_added, prepare_entry(xdf, ri));
+	return 0;
+}
+
+int xdl_markup_duplicates(xdfenv_t *xe, xdchange_t *xscr,
+			  xdemitconf_t const *xecfg)
+{
+	long s1, s2;
+	xdchange_t *xch, *xche;
+
+	duplicates_added = xmalloc(sizeof(*duplicates_added));
+	duplicates_removed = xmalloc(sizeof(*duplicates_removed));
+	hashmap_init(duplicates_added, (hashmap_cmp_fn)dup_entry_cmp, 0);
+	hashmap_init(duplicates_removed, (hashmap_cmp_fn)dup_entry_cmp, 0);
+
+	for (xch = xscr; xch; xch = xche->next) {
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;
+
+		for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) {
+
+			/*
+			 * Removes lines from the first file.
+			 */
+			for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
+				if (add_removal(&xe->xdf1, s1) < 0)
+					return -1;
+
+			/*
+			 * Adds lines from the second file.
+			 */
+			for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
+				if (add_addition(&xe->xdf2, s2) < 0)
+					return -1;
+
+			if (xch == xche)
+				break;
+			s1 = xch->i1 + xch->chg1;
+			s2 = xch->i2 + xch->chg2;
+		}
+	}
+	return 0;
+}
+
+static int xdl_check_and_emit_record(xdfile_t *xdf, long ri,
+				     char *pre, xdemitcb_t *ecb,
+				     int duplicate_handling)
+{
+	const char *hacked_pre = pre;
+
+	if (duplicate_handling) {
+		struct dup_entry *keydata = prepare_entry(xdf, ri);
+
+		if (*pre == '+' &&
+		    hashmap_get(duplicates_removed, keydata, keydata))
+				hacked_pre = "*";
+
+		if (*pre == '-' &&
+		    hashmap_get(duplicates_added, keydata, keydata))
+				hacked_pre = "~";
+		free(keydata);
+	}
+
+	return xdl_emit_record(xdf, ri, hacked_pre, ecb);
+}
+
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg) {
 	long s1, s2, e1, e2, lctx;
 	xdchange_t *xch, *xche;
 	long funclineprev = -1;
 	struct func_line func_line = { 0 };
+	int duplicate_handling = 0;
+
+	/* for testing I added a `|| 1` */
+	duplicate_handling = xecfg->flags & XDL_EMIT_DUPLICATE || 1;
+	if (duplicate_handling)
+		xdl_markup_duplicates(xe, xscr, xecfg);
 
 	for (xch = xscr; xch; xch = xche->next) {
 		xche = xdl_get_hunk(&xch, xecfg);
@@ -279,14 +403,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			 * Removes lines from the first file.
 			 */
 			for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
-				if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0)
+				if (xdl_check_and_emit_record(&xe->xdf1, s1, "-", ecb, duplicate_handling) < 0)
 					return -1;
 
 			/*
 			 * Adds lines from the second file.
 			 */
 			for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
-				if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0)
+				if (xdl_check_and_emit_record(&xe->xdf2, s2, "+", ecb, duplicate_handling) < 0)
 					return -1;
 
 			if (xch == xche)
-- 
2.10.0.rc2.23.gf336a1a.dirty


  parent reply	other threads:[~2016-09-03  3:31 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-03  3:31 [RFC/PATCH 0/2] Color moved code differently Stefan Beller
2016-09-03  3:31 ` [PATCH 1/2] diff.c: emit duplicate lines with a different color Stefan Beller
2016-09-03  3:31 ` Stefan Beller [this message]
2016-09-03 12:25   ` [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently Jakub Narębski
2016-09-04  5:31     ` Stefan Beller
2016-09-04 10:35       ` Jakub Narębski
2016-09-04  6:48     ` Junio C Hamano
2016-09-03  7:00 ` [RFC/PATCH 0/2] Color moved code differently Junio C Hamano
2016-09-04  5:23   ` Stefan Beller
2016-09-04  6:41     ` Junio C Hamano
2016-09-04  8:28       ` Stefan Beller
2016-09-04 22:19       ` Junio C Hamano
2016-09-04  9:57 ` Jacob Keller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160903033120.20511-3-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).