git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Elijah Newren <newren@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, Elijah Newren <newren@gmail.com>
Subject: [PATCH v3 21/33] merge-recursive: add get_directory_renames()
Date: Tue, 21 Nov 2017 00:00:47 -0800	[thread overview]
Message-ID: <20171121080059.32304-22-newren@gmail.com> (raw)
In-Reply-To: <20171121080059.32304-1-newren@gmail.com>

This populates a list of directory renames for us.  The list of
directory renames is not yet used, but will be in subsequent commits.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-recursive.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/merge-recursive.c b/merge-recursive.c
index 2f4f85314a..6a0a6d4366 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1384,6 +1384,132 @@ static struct diff_queue_struct *get_diffpairs(struct merge_options *o,
 	return ret;
 }
 
+static void get_renamed_dir_portion(const char *old_path, const char *new_path,
+				    char **old_dir, char **new_dir)
+{
+	char *end_of_old, *end_of_new;
+	int old_len, new_len;
+
+	*old_dir = NULL;
+	*new_dir = NULL;
+
+	/* For
+	 *    "a/b/c/d/foo.c" -> "a/b/something-else/d/foo.c"
+	 * the "d/foo.c" part is the same, we just want to know that
+	 *    "a/b/c" was renamed to "a/b/something-else"
+	 * so, for this example, this function returns "a/b/c" in
+	 * *old_dir and "a/b/something-else" in *new_dir.
+	 *
+	 * Also, if the basename of the file changed, we don't care.  We
+	 * want to know which portion of the directory, if any, changed.
+	 */
+	end_of_old = strrchr(old_path, '/');
+	end_of_new = strrchr(new_path, '/');
+
+	if (end_of_old == NULL || end_of_new == NULL)
+		return;
+	while (*--end_of_new == *--end_of_old &&
+	       end_of_old != old_path &&
+	       end_of_new != new_path)
+		; /* Do nothing; all in the while loop */
+	/*
+	 * We've found the first non-matching character in the directory
+	 * paths.  That means the current directory we were comparing
+	 * represents the rename.  Move end_of_old and end_of_new back
+	 * to the full directory name.
+	 */
+	if (*end_of_old == '/')
+		end_of_old++;
+	if (*end_of_old != '/')
+		end_of_new++;
+	end_of_old = strchr(end_of_old, '/');
+	end_of_new = strchr(end_of_new, '/');
+
+	/*
+	 * It may have been the case that old_path and new_path were the same
+	 * directory all along.  Don't claim a rename if they're the same.
+	 */
+	old_len = end_of_old - old_path;
+	new_len = end_of_new - new_path;
+
+	if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
+		*old_dir = strndup(old_path, old_len);
+		*new_dir = strndup(new_path, new_len);
+	}
+}
+
+static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
+					     struct tree *tree)
+{
+	struct hashmap *dir_renames;
+	struct hashmap_iter iter;
+	struct dir_rename_entry *entry;
+	int i;
+
+	dir_renames = malloc(sizeof(struct hashmap));
+	dir_rename_init(dir_renames);
+	for (i = 0; i < pairs->nr; ++i) {
+		struct string_list_item *item;
+		int *count;
+		struct diff_filepair *pair = pairs->queue[i];
+		char *old_dir, *new_dir;
+
+		get_renamed_dir_portion(pair->one->path, pair->two->path,
+					&old_dir,        &new_dir);
+		if (!old_dir)
+			/* Directory didn't change at all; ignore this one. */
+			continue;
+
+		entry = dir_rename_find_entry(dir_renames, old_dir);
+		if (!entry) {
+			entry = xmalloc(sizeof(struct dir_rename_entry));
+			dir_rename_entry_init(entry, old_dir);
+			hashmap_put(dir_renames, entry);
+		} else {
+			free(old_dir);
+		}
+		item = string_list_lookup(&entry->possible_new_dirs, new_dir);
+		if (!item) {
+			item = string_list_insert(&entry->possible_new_dirs,
+						  new_dir);
+			item->util = xcalloc(1, sizeof(int));
+		} else {
+			free(new_dir);
+		}
+		count = item->util;
+		*count += 1;
+	}
+
+	hashmap_iter_init(dir_renames, &iter);
+	while ((entry = hashmap_iter_next(&iter))) {
+		int max = 0;
+		int bad_max = 0;
+		char *best = NULL;
+
+		for (i = 0; i < entry->possible_new_dirs.nr; i++) {
+			int *count = entry->possible_new_dirs.items[i].util;
+
+			if (*count == max)
+				bad_max = max;
+			else if (*count > max) {
+				max = *count;
+				best = entry->possible_new_dirs.items[i].string;
+			}
+		}
+		if (bad_max == max)
+			entry->non_unique_new_dir = 1;
+		else
+			entry->new_dir = strdup(best);
+		/* Strings were strndup'ed before inserting into string-list,
+		 * so ask string_list to remove the entries for us.
+		 */
+		entry->possible_new_dirs.strdup_strings = 1;
+		string_list_clear(&entry->possible_new_dirs, 1);
+	}
+
+	return dir_renames;
+}
+
 /*
  * Get information of all renames which occurred in 'pairs', making use of
  * any implicit directory renames inferred from the other side of history.
@@ -1703,6 +1829,9 @@ static int handle_renames(struct merge_options *o,
 			  struct rename_info *ri)
 {
 	struct diff_queue_struct *head_pairs, *merge_pairs;
+	struct hashmap *dir_re_head, *dir_re_merge;
+	struct hashmap_iter iter;
+	struct dir_rename_entry *e;
 	int clean;
 
 	ri->head_renames = NULL;
@@ -1714,6 +1843,9 @@ static int handle_renames(struct merge_options *o,
 	head_pairs = get_diffpairs(o, common, head);
 	merge_pairs = get_diffpairs(o, common, merge);
 
+	dir_re_head = get_directory_renames(head_pairs, head);
+	dir_re_merge = get_directory_renames(merge_pairs, merge);
+
 	ri->head_renames  = get_renames(o, head_pairs, head,
 					 common, head, merge, entries);
 	ri->merge_renames = get_renames(o, merge_pairs, merge,
@@ -1726,6 +1858,26 @@ static int handle_renames(struct merge_options *o,
 	 * process_entry().  But there are a few things we can free now.
 	 */
 
+	hashmap_iter_init(dir_re_head, &iter);
+	while ((e = hashmap_iter_next(&iter))) {
+		free(e->dir);
+		if (e->new_dir)
+			free(e->new_dir);
+		/* possible_new_dirs already cleared in get_directory_renames */
+	}
+	hashmap_free(dir_re_head, 1);
+	free(dir_re_head);
+
+	hashmap_iter_init(dir_re_merge, &iter);
+	while ((e = hashmap_iter_next(&iter))) {
+		free(e->dir);
+		if (e->new_dir)
+			free(e->new_dir);
+		/* possible_new_dirs already cleared in get_directory_renames */
+	}
+	hashmap_free(dir_re_merge, 1);
+	free(dir_re_merge);
+
 	free(head_pairs->queue);
 	free(head_pairs);
 	free(merge_pairs->queue);
-- 
2.15.0.309.g62ce55426d


  parent reply	other threads:[~2017-11-21  8:01 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-21  8:00 [PATCH v3 00/33] Add directory rename detection to git Elijah Newren
2017-11-21  8:00 ` [PATCH v3 01/33] Tighten and correct a few testcases for merging and cherry-picking Elijah Newren
2017-11-21  8:00 ` [PATCH v3 02/33] merge-recursive: fix logic ordering issue Elijah Newren
2017-11-21  8:00 ` [PATCH v3 03/33] merge-recursive: add explanation for src_entry and dst_entry Elijah Newren
2017-11-21  8:00 ` [PATCH v3 04/33] directory rename detection: basic testcases Elijah Newren
2017-11-21  8:00 ` [PATCH v3 05/33] directory rename detection: directory splitting testcases Elijah Newren
2017-11-21  8:00 ` [PATCH v3 06/33] directory rename detection: testcases to avoid taking detection too far Elijah Newren
2017-11-21  8:00 ` [PATCH v3 07/33] directory rename detection: partially renamed directory testcase/discussion Elijah Newren
2017-11-21  8:00 ` [PATCH v3 08/33] directory rename detection: files/directories in the way of some renames Elijah Newren
2017-11-21  8:00 ` [PATCH v3 09/33] directory rename detection: testcases checking which side did the rename Elijah Newren
2017-11-21  8:00 ` [PATCH v3 10/33] directory rename detection: more involved edge/corner testcases Elijah Newren
2017-11-21  8:00 ` [PATCH v3 11/33] directory rename detection: testcases exploring possibly suboptimal merges Elijah Newren
2017-11-22 18:29   ` Stefan Beller
2017-11-21  8:00 ` [PATCH v3 12/33] directory rename detection: miscellaneous testcases to complete coverage Elijah Newren
2017-11-21  8:00 ` [PATCH v3 13/33] directory rename detection: tests for handling overwriting untracked files Elijah Newren
2017-11-21  8:00 ` [PATCH v3 14/33] directory rename detection: tests for handling overwriting dirty files Elijah Newren
2017-11-21  8:00 ` [PATCH v3 15/33] merge-recursive: move the get_renames() function Elijah Newren
2017-11-21  8:00 ` [PATCH v3 16/33] merge-recursive: introduce new functions to handle rename logic Elijah Newren
2017-11-21  8:00 ` [PATCH v3 17/33] merge-recursive: fix leaks of allocated renames and diff_filepairs Elijah Newren
2017-11-21  8:00 ` [PATCH v3 18/33] merge-recursive: make !o->detect_rename codepath more obvious Elijah Newren
2017-11-21  8:00 ` [PATCH v3 19/33] merge-recursive: split out code for determining diff_filepairs Elijah Newren
2017-11-21  8:00 ` [PATCH v3 20/33] merge-recursive: add a new hashmap for storing directory renames Elijah Newren
2017-11-21  8:00 ` Elijah Newren [this message]
2017-11-26  0:52   ` [PATCH v3 21/33] merge-recursive: add get_directory_renames() Johannes Schindelin
2017-11-26  1:45     ` Elijah Newren
2017-11-21  8:00 ` [PATCH v3 22/33] merge-recursive: check for directory level conflicts Elijah Newren
2017-11-22 18:36   ` Stefan Beller
2017-11-21  8:00 ` [PATCH v3 23/33] merge-recursive: add a new hashmap for storing file collisions Elijah Newren
2017-11-21  8:00 ` [PATCH v3 24/33] merge-recursive: add computation of collisions due to dir rename & merging Elijah Newren
2017-11-21  8:00 ` [PATCH v3 25/33] merge-recursive: check for file level conflicts then get new name Elijah Newren
2017-11-22 18:56   ` Stefan Beller
2017-11-21  8:00 ` [PATCH v3 26/33] merge-recursive: when comparing files, don't include trees Elijah Newren
2017-11-21  8:00 ` [PATCH v3 27/33] merge-recursive: apply necessary modifications for directory renames Elijah Newren
2017-11-21  8:00 ` [PATCH v3 28/33] merge-recursive: avoid clobbering untracked files with " Elijah Newren
2017-11-21  8:00 ` [PATCH v3 29/33] merge-recursive: fix overwriting dirty files involved in renames Elijah Newren
2017-11-21  8:00 ` [PATCH v3 30/33] merge-recursive: fix remaining directory rename + dirty overwrite cases Elijah Newren
2017-11-21  8:00 ` [PATCH v3 31/33] directory rename detection: new testcases showcasing a pair of bugs Elijah Newren
2017-11-21  8:00 ` [PATCH v3 32/33] merge-recursive: avoid spurious rename/rename conflict from dir renames Elijah Newren
2017-11-21  8:00 ` [PATCH v3 33/33] merge-recursive: ensure we write updates for directory-renamed file Elijah Newren
2017-11-22  0:42 ` [PATCH v3 00/33] Add directory rename detection to git Stefan Beller
2017-11-22  1:12   ` Elijah Newren
2017-11-22  2:44     ` Junio C Hamano
2017-11-22 19:24     ` Stefan Beller
2017-11-23  6:22       ` Elijah Newren
2017-11-23 11:52 ` Adam Dinwoodie
2017-11-23 22:28   ` Elijah Newren
2017-11-24  5:25     ` Elijah Newren
2017-11-24 20:07       ` Elijah Newren

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171121080059.32304-22-newren@gmail.com \
    --to=newren@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).