git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Elijah Newren <newren@gmail.com>
To: git@vger.kernel.org
Cc: Elijah Newren <newren@gmail.com>
Subject: [PATCH 24/30] merge-recursive: Add computation of collisions due to dir rename & merging
Date: Fri, 10 Nov 2017 11:05:44 -0800	[thread overview]
Message-ID: <20171110190550.27059-25-newren@gmail.com> (raw)
In-Reply-To: <20171110190550.27059-1-newren@gmail.com>

directory renaming and merging can cause one or more files to be moved to
where an existing file is, or to cause several files to all be moved to
the same (otherwise vacant) location.  Add checking and reporting for such
cases, falling back to no-directory-rename handling for such paths.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-recursive.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 116 insertions(+), 2 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 1858686c35..251c4cc7fa 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1408,6 +1408,30 @@ static int tree_has_path(struct tree *tree, const char *path)
 			       hashy, &mode_o);
 }
 
+/*
+ * Return a new string that replaces the beginning portion (which matches
+ * entry->dir), with entry->new_dir.  In perl-speak:
+ *   new_path_name = (old_path =~ s/entry->dir/entry->new_dir/);
+ */
+static char *apply_dir_rename(struct dir_rename_entry *entry,
+			       const char *old_path) {
+	char *new_path;
+	int oldlen, newlen;
+
+	if (entry->non_unique_new_dir)
+		return NULL;
+
+	oldlen = strlen(entry->dir);
+	assert(strncmp(entry->dir, old_path, oldlen) == 0 &&
+	       old_path[oldlen] == '/');
+	newlen = strlen(entry->new_dir) + (strlen(old_path) - oldlen) + 1;
+	new_path = malloc(newlen);
+	strcpy(new_path, entry->new_dir);
+	strcpy(&new_path[strlen(new_path)], &old_path[oldlen]);
+
+	return new_path;
+}
+
 static void get_renamed_dir_portion(const char *old_path, const char *new_path,
 				    char **old_dir, char **new_dir) {
 	*old_dir = NULL;
@@ -1625,6 +1649,82 @@ static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
 	return dir_renames;
 }
 
+static struct dir_rename_entry *check_dir_renamed(const char *path,
+						  struct hashmap *dir_renames) {
+	char temp[PATH_MAX];
+	char *end;
+	struct dir_rename_entry *entry;
+
+	strcpy(temp, path);
+	while ((end = strrchr(temp, '/'))) {
+		*end = '\0';
+		entry = dir_rename_find_entry(dir_renames, temp);
+		if (entry)
+			return entry;
+	}
+	return NULL;
+}
+
+static void compute_collisions(struct hashmap *collisions,
+			       struct hashmap *dir_renames,
+			       struct diff_queue_struct *pairs)
+{
+	int i;
+
+	/*
+	 * Multiple files can be mapped to the same path due to directory
+	 * renames done by the other side of history.  Since that other
+	 * side of history could have merged multiple directories into one,
+	 * if our side of history added the same file basename to each of
+	 * those directories, then all N of them would get implicitly
+	 * renamed by the directory rename detection into the same path,
+	 * and we'd get an add/add/.../add conflict, and all those adds
+	 * from *this* side of history.  This is not representable in the
+	 * index, and users aren't going to easily be able to make sense of
+	 * it.  So we need to provide a good warning about what's
+	 * happening, and fall back to no-directory-rename detection
+	 * behavior for those paths.
+	 *
+	 * See testcases 9e and all of section 5 from t6043 for examples.
+	 */
+	collision_init(collisions);
+
+	for (i = 0; i < pairs->nr; ++i) {
+		struct dir_rename_entry *dir_rename_ent;
+		struct collision_entry *collision_ent;
+		char *new_path;
+		struct diff_filepair *pair = pairs->queue[i];
+
+		if (pair->status == 'D')
+			continue;
+		dir_rename_ent = check_dir_renamed(pair->two->path, dir_renames);
+		if (!dir_rename_ent)
+			continue;
+
+		new_path = apply_dir_rename(dir_rename_ent, pair->two->path);
+		if (!new_path)
+			/*
+			 * dir_rename_ent->non_unique_new_path is true, which
+			 * means there is no directory rename for us to use,
+			 * which means it won't cause us any additional
+			 * collisions.
+			 */
+			continue;
+		collision_ent = collision_find_entry(collisions, new_path);
+		if (!collision_ent) {
+			collision_ent = xcalloc(1,
+						sizeof(struct collision_entry));
+			hashmap_entry_init(collision_ent, strhash(new_path));
+			hashmap_put(collisions, collision_ent);
+			collision_ent->target_file = new_path;
+		} else {
+			free(new_path);
+		}
+		string_list_insert(&collision_ent->source_files,
+				   pair->two->path);
+	}
+}
+
 /*
  * Get information of all renames which occurred in 'pairs', making use of
  * any implicit directory renames inferred from the other side of history.
@@ -1634,6 +1734,7 @@ static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
  */
 static struct string_list *get_renames(struct merge_options *o,
 				       struct diff_queue_struct *pairs,
+				       struct hashmap *dir_renames,
 				       struct tree *tree,
 				       struct tree *o_tree,
 				       struct tree *a_tree,
@@ -1641,8 +1742,12 @@ static struct string_list *get_renames(struct merge_options *o,
 				       struct string_list *entries)
 {
 	int i;
+	struct hashmap collisions;
+	struct hashmap_iter iter;
+	struct collision_entry *e;
 	struct string_list *renames;
 
+	compute_collisions(&collisions, dir_renames, pairs);
 	renames = xcalloc(1, sizeof(struct string_list));
 
 	for (i = 0; i < pairs->nr; ++i) {
@@ -1672,6 +1777,13 @@ static struct string_list *get_renames(struct merge_options *o,
 		item = string_list_insert(renames, pair->one->path);
 		item->util = re;
 	}
+
+	hashmap_iter_init(&collisions, &iter);
+	while ((e = hashmap_iter_next(&iter))) {
+		free(e->target_file);
+		string_list_clear(&e->source_files, 0);
+	}
+	hashmap_free(&collisions, 1);
 	return renames;
 }
 
@@ -1962,9 +2074,11 @@ static struct rename_info *handle_renames(struct merge_options *o,
 					 dir_re_head, head,
 					 dir_re_merge, merge);
 
-	rei->head_renames  = get_renames(o, head_pairs, head,
+	rei->head_renames  = get_renames(o, head_pairs,
+					 dir_re_merge, head,
 					 common, head, merge, entries);
-	rei->merge_renames = get_renames(o, merge_pairs, merge,
+	rei->merge_renames = get_renames(o, merge_pairs,
+					 dir_re_head, merge,
 					 common, head, merge, entries);
 	*clean = process_renames(o, rei->head_renames, rei->merge_renames);
 
-- 
2.15.0.5.g9567be9905


  parent reply	other threads:[~2017-11-10 19:06 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-10 19:05 [PATCH 00/30] Add directory rename detection to git Elijah Newren
2017-11-10 19:05 ` [PATCH 01/30] Tighten and correct a few testcases for merging and cherry-picking Elijah Newren
2017-11-13 19:32   ` Stefan Beller
2017-11-10 19:05 ` [PATCH 02/30] merge-recursive: Fix logic ordering issue Elijah Newren
2017-11-13 19:48   ` Stefan Beller
2017-11-13 22:04     ` Elijah Newren
2017-11-13 22:12       ` Stefan Beller
2017-11-13 23:39         ` Elijah Newren
2017-11-13 23:46           ` Stefan Beller
2017-11-10 19:05 ` [PATCH 03/30] merge-recursive: Add explanation for src_entry and dst_entry Elijah Newren
2017-11-13 21:06   ` Stefan Beller
2017-11-13 22:57     ` Elijah Newren
2017-11-13 23:11       ` Stefan Beller
2017-11-14  1:26   ` Junio C Hamano
2017-11-10 19:05 ` [PATCH 04/30] directory rename detection: basic testcases Elijah Newren
2017-11-13 22:04   ` Stefan Beller
2017-11-14  0:57     ` Elijah Newren
2017-11-14  1:21       ` Stefan Beller
2017-11-14  1:40         ` Elijah Newren
2017-11-14  2:03     ` Junio C Hamano
2017-11-10 19:05 ` [PATCH 05/30] directory rename detection: directory splitting testcases Elijah Newren
2017-11-13 23:20   ` Stefan Beller
2017-11-10 19:05 ` [PATCH 06/30] directory rename detection: testcases to avoid taking detection too far Elijah Newren
2017-11-13 23:25   ` Stefan Beller
2017-11-14  1:02     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 07/30] directory rename detection: partially renamed directory testcase/discussion Elijah Newren
2017-11-14  0:07   ` Stefan Beller
2017-11-10 19:05 ` [PATCH 08/30] directory rename detection: files/directories in the way of some renames Elijah Newren
2017-11-14  0:15   ` Stefan Beller
2017-11-14  1:19     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 09/30] directory rename detection: testcases checking which side did the rename Elijah Newren
2017-11-14  0:25   ` Stefan Beller
2017-11-14  1:30     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 10/30] directory rename detection: more involved edge/corner testcases Elijah Newren
2017-11-14  0:42   ` Stefan Beller
2017-11-14 21:11     ` Elijah Newren
2017-11-14 22:47       ` Stefan Beller
2017-11-10 19:05 ` [PATCH 11/30] directory rename detection: testcases exploring possibly suboptimal merges Elijah Newren
2017-11-14 20:33   ` Stefan Beller
2017-11-14 21:42     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 12/30] directory rename detection: miscellaneous testcases to complete coverage Elijah Newren
2017-11-15 20:03   ` Stefan Beller
2017-11-16 21:17     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 13/30] directory rename detection: tests for handling overwriting untracked files Elijah Newren
2017-11-10 19:05 ` [PATCH 14/30] directory rename detection: tests for handling overwriting dirty files Elijah Newren
2017-11-10 19:05 ` [PATCH 15/30] merge-recursive: Move the get_renames() function Elijah Newren
2017-11-14  4:46   ` Junio C Hamano
2017-11-14 17:41     ` Elijah Newren
2017-11-15  1:20       ` Junio C Hamano
2017-11-10 19:05 ` [PATCH 16/30] merge-recursive: Introduce new functions to handle rename logic Elijah Newren
2017-11-14  4:56   ` Junio C Hamano
2017-11-14  5:14     ` Junio C Hamano
2017-11-14 18:24       ` Elijah Newren
2017-11-10 19:05 ` [PATCH 17/30] merge-recursive: Fix leaks of allocated renames and diff_filepairs Elijah Newren
2017-11-14  4:58   ` Junio C Hamano
2017-11-10 19:05 ` [PATCH 18/30] merge-recursive: Make !o->detect_rename codepath more obvious Elijah Newren
2017-11-10 19:05 ` [PATCH 19/30] merge-recursive: Split out code for determining diff_filepairs Elijah Newren
2017-11-14  5:20   ` Junio C Hamano
2017-11-10 19:05 ` [PATCH 20/30] merge-recursive: Add a new hashmap for storing directory renames Elijah Newren
2017-11-10 19:05 ` [PATCH 21/30] merge-recursive: Add get_directory_renames() Elijah Newren
2017-11-14  5:30   ` Junio C Hamano
2017-11-14 18:38     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 22/30] merge-recursive: Check for directory level conflicts Elijah Newren
2017-11-10 19:05 ` [PATCH 23/30] merge-recursive: Add a new hashmap for storing file collisions Elijah Newren
2017-11-10 19:05 ` Elijah Newren [this message]
2018-06-10 10:56   ` [PATCH 24/30] merge-recursive: Add computation of collisions due to dir rename & merging René Scharfe
2018-06-10 11:03     ` René Scharfe
2018-06-10 20:44     ` Jeff King
2018-06-11 15:03     ` Elijah Newren
2018-06-14 17:36     ` Junio C Hamano
2017-11-10 19:05 ` [PATCH 25/30] merge-recursive: Check for file level conflicts then get new name Elijah Newren
2017-11-10 19:05 ` [PATCH 26/30] merge-recursive: When comparing files, don't include trees Elijah Newren
2017-11-10 19:05 ` [PATCH 27/30] merge-recursive: Apply necessary modifications for directory renames Elijah Newren
2017-11-15 20:23   ` Stefan Beller
2017-11-16  3:54     ` Elijah Newren
2017-11-10 19:05 ` [PATCH 28/30] merge-recursive: Avoid clobbering untracked files with " Elijah Newren
2017-11-10 19:05 ` [RFC PATCH 29/30] merge-recursive: Fix overwriting dirty files involved in renames Elijah Newren
2017-11-10 19:05 ` [PATCH 30/30] merge-recursive: Fix remaining directory rename + dirty overwrite cases Elijah Newren
2017-11-10 22:27 ` [PATCH 00/30] Add directory rename detection to git Philip Oakley
2017-11-10 23:26   ` Elijah Newren
2017-11-13 15:04     ` Philip Oakley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171110190550.27059-25-newren@gmail.com \
    --to=newren@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).