From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Git Mailing List <git@vger.kernel.org>
Subject: [PATCH 10/12] Fix the way diffcore-rename records unremoved source.
Date: Fri, 27 May 2005 15:55:55 -0700 [thread overview]
Message-ID: <7vsm082s90.fsf_-_@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7vk6lk5lxt.fsf_-_@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Fri, 27 May 2005 15:43:58 -0700")
Earier version of diffcore-rename used to keep unmodified
filepair in its output so that the last stage of the processing
that tells renames from copies can make all of rename/copy to
copies. However this had a bad interaction with other diffcore
filters that wanted to run after diffcore-rename, in that such
unmodified filepair must be retained for proper distinction
between renames and copies to happen.
This patch fixes the problem by changing the way diffcore-rename
records the information needed to distinguish "all are copies"
case and "the last one is a rename" case.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
diff.c | 76 ++++++++++++++++++--------------------
diffcore-rename.c | 63 ++++++++++++-------------------
diffcore.h | 7 ++-
t/t4007-rename-3.sh | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 169 insertions(+), 80 deletions(-)
new file (100644): t/t4007-rename-3.sh
diff --git a/diff.c b/diff.c
--- a/diff.c
+++ b/diff.c
@@ -518,6 +518,7 @@ struct diff_filepair *diff_queue(struct
dp->one = one;
dp->two = two;
dp->score = 0;
+ dp->source_stays = 0;
diff_q(queue, dp);
return dp;
}
@@ -675,8 +676,8 @@ void diff_debug_filepair(const struct di
{
diff_debug_filespec(p->one, i, "one");
diff_debug_filespec(p->two, i, "two");
- fprintf(stderr, "score %d, status %c\n",
- p->score, p->status ? : '?');
+ fprintf(stderr, "score %d, status %c source_stays %d\n",
+ p->score, p->status ? : '?', p->source_stays);
}
void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
@@ -698,8 +699,6 @@ static void diff_resolve_rename_copy(voi
struct diff_filepair *p, *pp;
struct diff_queue_struct *q = &diff_queued_diff;
- /* This should not depend on the ordering of things. */
-
diff_debug_queue("resolve-rename-copy", q);
for (i = 0; i < q->nr; i++) {
@@ -707,23 +706,28 @@ static void diff_resolve_rename_copy(voi
p->status = 0; /* undecided */
if (DIFF_PAIR_UNMERGED(p))
p->status = 'U';
- else if (!DIFF_FILE_VALID((p)->one))
+ else if (!DIFF_FILE_VALID(p->one))
p->status = 'N';
- else if (!DIFF_FILE_VALID((p)->two)) {
- /* Deletion record should be omitted if there
- * are rename/copy entries using this one as
- * the source. Then we can say one of them
- * is a rename and the rest are copies.
+ else if (!DIFF_FILE_VALID(p->two)) {
+ /* Deleted entry may have been picked up by
+ * another rename-copy entry. So we scan the
+ * queue and if we find one that uses us as the
+ * source we do not say delete for this entry.
*/
- p->status = 'D';
for (j = 0; j < q->nr; j++) {
pp = q->queue[j];
- if (!strcmp(pp->one->path, p->one->path) &&
- strcmp(pp->one->path, pp->two->path)) {
+ if (!strcmp(p->one->path, pp->one->path) &&
+ pp->score) {
+ /* rename/copy are always valid
+ * so we do not say DIFF_FILE_VALID()
+ * on pp->one and pp->two.
+ */
p->status = 'X';
break;
}
}
+ if (!p->status)
+ p->status = 'D';
}
else if (DIFF_PAIR_TYPE_CHANGED(p))
p->status = 'T';
@@ -732,33 +736,24 @@ static void diff_resolve_rename_copy(voi
* whose both sides are valid and of the same type, i.e.
* either in-place edit or rename/copy edit.
*/
- else if (strcmp(p->one->path, p->two->path)) {
- /* See if there is somebody else anywhere that
- * will keep the path (either modified or
- * unmodified). If so, we have to be a copy,
- * not a rename. In addition, if there is
- * some other rename or copy that comes later
- * than us that uses the same source, we
- * have to be a copy, not a rename.
+ else if (p->score) {
+ if (p->source_stays) {
+ p->status = 'C';
+ continue;
+ }
+ /* See if there is some other filepair that
+ * copies from the same source as us. If so
+ * we are a copy. Otherwise we are a rename.
*/
- for (j = 0; j < q->nr; j++) {
+ for (j = i + 1; j < q->nr; j++) {
pp = q->queue[j];
if (strcmp(pp->one->path, p->one->path))
- continue;
- if (!strcmp(pp->one->path, pp->two->path)) {
- if (DIFF_FILE_VALID(pp->two)) {
- /* non-delete */
- p->status = 'C';
- break;
- }
- continue;
- }
- /* pp is a rename/copy ... */
- if (i < j) {
- /* ... and comes later than us */
- p->status = 'C';
- break;
- }
+ continue; /* not us */
+ if (!pp->score)
+ continue; /* not a rename/copy */
+ /* pp is a rename/copy from the same source */
+ p->status = 'C';
+ break;
}
if (!p->status)
p->status = 'R';
@@ -767,8 +762,11 @@ static void diff_resolve_rename_copy(voi
p->one->mode != p->two->mode)
p->status = 'M';
else
- /* this is a "no-change" entry */
- p->status = 'X';
+ /* this is a "no-change" entry.
+ * should not happen anymore.
+ * p->status = 'X';
+ */
+ die("internal error in diffcore: unmodified entry remains");
}
diff_debug_queue("resolve-rename-copy done", q);
}
diff --git a/diffcore-rename.c b/diffcore-rename.c
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -52,14 +52,15 @@ static struct diff_rename_dst *locate_re
return &(rename_dst[first]);
}
+/* Table of rename/copy src files */
static struct diff_rename_src {
struct diff_filespec *one;
- unsigned src_used : 1;
+ unsigned src_stays : 1;
} *rename_src;
static int rename_src_nr, rename_src_alloc;
-static struct diff_rename_src *locate_rename_src(struct diff_filespec *one,
- int insert_ok)
+static struct diff_rename_src *register_rename_src(struct diff_filespec *one,
+ int src_stays)
{
int first, last;
@@ -77,9 +78,7 @@ static struct diff_rename_src *locate_re
}
first = next+1;
}
- /* not found */
- if (!insert_ok)
- return NULL;
+
/* insert to make it at "first" */
if (rename_src_alloc <= rename_src_nr) {
rename_src_alloc = alloc_nr(rename_src_alloc);
@@ -91,7 +90,7 @@ static struct diff_rename_src *locate_re
memmove(rename_src + first + 1, rename_src + first,
(rename_src_nr - first - 1) * sizeof(*rename_src));
rename_src[first].one = one;
- rename_src[first].src_used = 0;
+ rename_src[first].src_stays = src_stays;
return &(rename_src[first]);
}
@@ -199,15 +198,14 @@ static void record_rename_pair(struct di
fill_filespec(two, dst->sha1, dst->mode);
dp = diff_queue(renq, one, two);
- dp->score = score;
-
- rename_src[src_index].src_used = 1;
+ dp->score = score ? : 1; /* make sure it is at least 1 */
+ dp->source_stays = rename_src[src_index].src_stays;
rename_dst[dst_index].pair = dp;
}
/*
* We sort the rename similarity matrix with the score, in descending
- * order (more similar first).
+ * order (the most similar first).
*/
static int score_compare(const void *a_, const void *b_)
{
@@ -254,9 +252,9 @@ void diffcore_rename(int detect_rename,
else
locate_rename_dst(p->two, 1);
else if (!DIFF_FILE_VALID(p->two))
- locate_rename_src(p->one, 1);
- else if (1 < detect_rename) /* find copy, too */
- locate_rename_src(p->one, 1);
+ register_rename_src(p->one, 0);
+ else if (detect_rename == DIFF_DETECT_COPY)
+ register_rename_src(p->one, 1);
}
if (rename_dst_nr == 0)
goto cleanup; /* nothing to do */
@@ -280,7 +278,7 @@ void diffcore_rename(int detect_rename,
* doing the delta matrix altogether.
*/
if (renq.nr == rename_dst_nr)
- goto flush_rest;
+ goto cleanup;
num_create = (rename_dst_nr - renq.nr);
num_src = rename_src_nr;
@@ -307,37 +305,30 @@ void diffcore_rename(int detect_rename,
if (dst->pair)
continue; /* already done, either exact or fuzzy. */
if (mx[i].score < minimum_score)
- break; /* there is not any more diffs applicable. */
+ break; /* there is no more usable pair. */
record_rename_pair(&renq, mx[i].dst, mx[i].src, mx[i].score);
}
free(mx);
diff_debug_queue("done detecting fuzzy", &renq);
- flush_rest:
+ cleanup:
/* At this point, we have found some renames and copies and they
* are kept in renq. The original list is still in *q.
- *
- * Scan the original list and move them into the outq; we will sort
- * outq and swap it into the queue supplied to pass that to
- * downstream, so we assign the sort keys in this loop.
- *
- * See comments at the top of record_rename_pair for numbers used
- * to assign rename_rank.
*/
outq.queue = NULL;
outq.nr = outq.alloc = 0;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- struct diff_rename_src *src = locate_rename_src(p->one, 0);
struct diff_rename_dst *dst = locate_rename_dst(p->two, 0);
struct diff_filepair *pair_to_free = NULL;
if (dst) {
/* creation */
if (dst->pair) {
- /* renq has rename/copy already to produce
- * this file, so we do not emit the creation
- * record in the output.
+ /* renq has rename/copy to produce
+ * this file already, so we do not
+ * emit the creation record in the
+ * output.
*/
diff_q(&outq, dst->pair);
pair_to_free = p;
@@ -349,17 +340,12 @@ void diffcore_rename(int detect_rename,
diff_q(&outq, p);
}
else if (!diff_unmodified_pair(p))
- /* all the other cases need to be recorded as is */
+ /* all the usual ones need to be kept */
diff_q(&outq, p);
- else {
- /* unmodified pair needs to be recorded only if
- * it is used as the source of rename/copy
- */
- if (src && src->src_used)
- diff_q(&outq, p);
- else
- pair_to_free = p;
- }
+ else
+ /* no need to keep unmodified pairs */
+ pair_to_free = p;
+
if (pair_to_free)
diff_free_filepair(pair_to_free);
}
@@ -370,7 +356,6 @@ void diffcore_rename(int detect_rename,
*q = outq;
diff_debug_queue("done collapsing", q);
- cleanup:
free(rename_dst);
rename_dst = NULL;
rename_dst_nr = rename_dst_alloc = 0;
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -39,8 +39,11 @@ extern void diff_free_filespec_data(stru
struct diff_filepair {
struct diff_filespec *one;
struct diff_filespec *two;
- int score; /* only valid when one and two are different paths */
- int status; /* M C R N D U (see Documentation/diff-format.txt) */
+ unsigned short int score; /* only valid when one and two are
+ * different paths
+ */
+ char source_stays; /* all of R/C are copies */
+ char status; /* M C R N D U (see Documentation/diff-format.txt) */
};
#define DIFF_PAIR_UNMERGED(p) \
(!DIFF_FILE_VALID((p)->one) && !DIFF_FILE_VALID((p)->two))
diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh
new file mode 100644
--- /dev/null
+++ b/t/t4007-rename-3.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Junio C Hamano
+#
+
+test_description='Rename interaction with pathspec.
+
+'
+. ./test-lib.sh
+
+_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
+_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
+sanitize_diff_raw='s/ \('"$_x40"'\) \1 \([CR]\)[0-9]* / \1 \1 \2# /'
+compare_diff_raw () {
+ # When heuristics are improved, the score numbers would change.
+ # Ignore them while comparing.
+ # Also we do not check SHA1 hash generation in this test, which
+ # is a job for t0000-basic.sh
+
+ sed -e "$sanitize_diff_raw" <"$1" >.tmp-1
+ sed -e "$sanitize_diff_raw" <"$2" >.tmp-2
+ diff -u .tmp-1 .tmp-2 && rm -f .tmp-1 .tmp-2
+}
+
+test_expect_success \
+ 'prepare reference tree' \
+ 'mkdir path0 path1 &&
+ cp ../../COPYING path0/COPYING &&
+ git-update-cache --add path0/COPYING &&
+ tree=$(git-write-tree) &&
+ echo $tree'
+
+test_expect_success \
+ 'prepare work tree' \
+ 'cp path0/COPYING path1/COPYING &&
+ git-update-cache --add --remove path0/COPYING path1/COPYING'
+
+# In the tree, there is only path0/COPYING. In the cache, path0 and
+# path1 both have COPYING and the latter is a copy of path0/COPYING.
+# Comparing the full tree with cache should tell us so.
+
+git-diff-cache -C $tree >current
+
+cat >expected <<\EOF
+:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 C100 path0/COPYING path1/COPYING
+EOF
+
+test_expect_success \
+ 'validate the result' \
+ 'compare_diff_raw current expected'
+
+# In the tree, there is only path0/COPYING. In the cache, path0 and
+# path1 both have COPYING and the latter is a copy of path0/COPYING.
+# When we omit output from path0 it should still be able to tell us
+# that path1/COPYING is result from a copy from path0/COPYING, not
+# rename, which would imply path0/COPYING is now gone.
+
+git-diff-cache -C $tree path1 >current
+
+cat >expected <<\EOF
+:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 C100 path0/COPYING path1/COPYING
+EOF
+
+test_expect_success \
+ 'validate the result' \
+ 'compare_diff_raw current expected'
+
+test_expect_success \
+ 'tweak work tree' \
+ 'rm -f path0/COPYING &&
+ git-update-cache --remove path0/COPYING'
+
+# In the tree, there is only path0/COPYING. In the cache, path0 does
+# not have COPYING anymore and path1 has COPYING which is a copy of
+# path0/COPYING. Showing the full tree with cache should tell us about
+# the rename.
+
+git-diff-cache -C $tree >current
+
+cat >expected <<\EOF
+:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 R100 path0/COPYING path1/COPYING
+EOF
+
+test_expect_success \
+ 'validate the result' \
+ 'compare_diff_raw current expected'
+
+# In the tree, there is only path0/COPYING. In the cache, path0 does
+# not have COPYING anymore and path1 has COPYING which is a copy of
+# path0/COPYING. Even if we restrict the output to path1, it still
+# should show us the rename.
+
+git-diff-cache -C $tree path1 >current
+
+cat >expected <<\EOF
+:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 R100 path0/COPYING path1/COPYING
+EOF
+
+test_expect_success \
+ 'validate the result' \
+ 'compare_diff_raw current expected'
+
+test_done
------------------------------------------------
next prev parent reply other threads:[~2005-05-27 22:55 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-05-27 0:41 Broken directory pathname pruning Linus Torvalds
2005-05-27 0:42 ` Junio C Hamano
2005-05-27 0:49 ` [PATCH] allow pathspec to end with a slash Junio C Hamano
2005-05-27 0:52 ` [PATCH] allow pathspec to end with a slash (take #2) Junio C Hamano
2005-05-27 6:41 ` [PATCH] Diff updates, fixing pathspec and rename/copy interaction Junio C Hamano
2005-05-27 15:56 ` Linus Torvalds
2005-05-27 18:22 ` Junio C Hamano
2005-05-27 22:43 ` [PATCH 00/12] Diff updates Junio C Hamano
2005-05-27 22:49 ` [PATCH 01/12] Fix math thinko in similarity estimator Junio C Hamano
2005-05-27 22:50 ` [PATCH 02/12] Introduce diff_free_filepair() funcion Junio C Hamano
2005-05-27 22:51 ` [PATCH 03/12] Make pathspec only care about the detination tree Junio C Hamano
2005-05-27 22:52 ` [PATCH 04/12] Remove unused rank field from diff_core structure Junio C Hamano
2005-05-27 22:53 ` [PATCH 05/12] Do not expose internal scaling to diff-helper Junio C Hamano
2005-05-27 22:54 ` [PATCH 06/12] Remove final newline from the value of xfrm_msg variable Junio C Hamano
2005-05-27 22:54 ` [PATCH 07/12] Clean up diff_setup() to make it more extensible Junio C Hamano
2005-05-27 22:55 ` [PATCH 08/12] Remove a function not used anymore Junio C Hamano
2005-05-27 22:55 ` [PATCH 09/12] Add --pickaxe-all to diff-* brothers Junio C Hamano
2005-05-27 22:55 ` Junio C Hamano [this message]
2005-05-27 22:56 ` [PATCH 11/12] Move pathspec to the beginning of the diffcore chain Junio C Hamano
2005-05-27 22:56 ` [PATCH 12/12] Optimize diff-tree -[CM] --stdin Junio C Hamano
2005-06-04 2:17 ` Yoichi Yuasa
2005-05-27 23:03 ` [PATCH 00/12] Diff updates Junio C Hamano
2005-05-28 10:11 ` [PATCH] Do not show empty diff in diff-cache uncached Junio C Hamano
2005-05-28 19:22 ` [PATCH] Diff: two fixes Junio C Hamano
2005-05-29 4:20 ` [PATCH] diff-helper: fix R/C score parsing under -z flag Junio C Hamano
2005-05-29 5:23 ` [PATCH] diff-cache: diff-patch (-p) format fixes Junio C Hamano
2005-05-29 9:10 ` [PATCH] diff: code clean-up Junio C Hamano
2005-05-29 18:53 ` [PATCH] Do not show empty diff in diff-cache uncached Linus Torvalds
2005-05-29 20:09 ` Junio C Hamano
2005-05-29 21:52 ` Junio C Hamano
2005-05-29 23:41 ` [PATCH 0/3] Leftover bits after 12-series Junio C Hamano
2005-05-29 23:54 ` [PATCH 1/3] diff-helper: Fix R/C score parsing under -z flag Junio C Hamano
2005-05-29 23:56 ` [PATCH 2/3] diff: consolidate various calls into diffcore Junio C Hamano
2005-05-29 23:56 ` [PATCH 3/3] diff: code clean-up and removal of rename hack Junio C Hamano
2005-05-30 6:58 ` [PATCH 0/4] Junio C Hamano
2005-05-30 7:07 ` [PATCH 1/4] diff: further cleanup Junio C Hamano
2005-05-30 7:08 ` [PATCH 2/4] diff: fix the culling of unneeded delete record Junio C Hamano
2005-05-30 7:08 ` [PATCH 3/4] Add -B flag to diff-* brothers Junio C Hamano
2005-05-30 7:09 ` [PATCH 4/4] Add -O<orderfile> option " Junio C Hamano
2005-05-30 5:34 ` [PATCH] Do not show empty diff in diff-cache uncached Linus Torvalds
2005-05-30 5:53 ` Junio C Hamano
2005-06-11 23:27 ` [PATCH] apply.c: tolerate diff from a dirty but unchanged path Junio C Hamano
2005-06-12 16:14 ` Linus Torvalds
2005-06-12 17:05 ` Linus Torvalds
2005-06-12 18:34 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7vsm082s90.fsf_-_@assigned-by-dhcp.cox.net \
--to=junkio@cox.net \
--cc=git@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).