git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Git Mailing List <git@vger.kernel.org>
Subject: [PATCH 1/4] Tweak count-delta interface
Date: Fri, 03 Jun 2005 01:36:03 -0700	[thread overview]
Message-ID: <7vekbjq1l8.fsf_-_@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7vis0vq1rz.fsf_-_@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Fri, 03 Jun 2005 01:32:00 -0700")

Make it return copied source and insertion separately, so that
later implementation of heuristics can use them more flexibly.

This does not change the heuristics implemented in
diffcore-rename nor diffcore-break in any way.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

 count-delta.h     |    3 ++-
 diffcore.h        |    2 --
 count-delta.c     |   30 ++++++++++++++++--------------
 diffcore-break.c  |   15 +++++++++++----
 diffcore-rename.c |   15 +++++++++++----
 5 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/count-delta.h b/count-delta.h
--- a/count-delta.h
+++ b/count-delta.h
@@ -4,6 +4,7 @@
 #ifndef COUNT_DELTA_H
 #define COUNT_DELTA_H
 
-unsigned long count_delta(void *, unsigned long);
+int count_delta(void *, unsigned long,
+		unsigned long *src_copied, unsigned long *literal_added);
 
 #endif
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -12,8 +12,6 @@
 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
 #define DEFAULT_BREAK_SCORE  59400 /* minimum for break to happen (99%)*/
 
-#define RENAME_DST_MATCHED 01
-
 struct diff_filespec {
 	unsigned char sha1[20];
 	char *path;
diff --git a/count-delta.c b/count-delta.c
--- a/count-delta.c
+++ b/count-delta.c
@@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const 
 /*
  * NOTE.  We do not _interpret_ delta fully.  As an approximation, we
  * just count the number of bytes that are copied from the source, and
- * the number of literal data bytes that are inserted.  Number of
- * bytes that are _not_ copied from the source is deletion, and number
- * of inserted literal bytes are addition, so sum of them is what we
- * return.  xdelta can express an edit that copies data inside of the
- * destination which originally came from the source.  We do not count
- * that in the following routine, so we are undercounting the source
- * material that remains in the final output that way.
+ * the number of literal data bytes that are inserted.
+ *
+ * Number of bytes that are _not_ copied from the source is deletion,
+ * and number of inserted literal bytes are addition, so sum of them
+ * is the extent of damage.  xdelta can express an edit that copies
+ * data inside of the destination which originally came from the
+ * source.  We do not count that in the following routine, so we are
+ * undercounting the source material that remains in the final output
+ * that way.
  */
-unsigned long count_delta(void *delta_buf, unsigned long delta_size)
+int count_delta(void *delta_buf, unsigned long delta_size,
+		unsigned long *src_copied, unsigned long *literal_added)
 {
 	unsigned long copied_from_source, added_literal;
 	const unsigned char *data, *top;
@@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_bu
 
 	/* the smallest delta size possible is 6 bytes */
 	if (delta_size < 6)
-		return UINT_MAX;
+		return -1;
 
 	data = delta_buf;
 	top = delta_buf + delta_size;
@@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_bu
 
 	/* sanity check */
 	if (data != top || out != dst_size)
-		return UINT_MAX;
+		return -1;
 
 	/* delete size is what was _not_ copied from source.
 	 * edit size is that and literal additions.
 	 */
-	if (src_size + added_literal < copied_from_source)
-		/* we ended up overcounting and underflowed */
-		return 0;
-	return (src_size - copied_from_source) + added_literal;
+	*src_copied = copied_from_source;
+	*literal_added = added_literal;
+	return 0;
 }
diff --git a/diffcore-break.c b/diffcore-break.c
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -23,7 +23,7 @@ static int very_different(struct diff_fi
 	 * want to get the filepair broken.
 	 */
 	void *delta;
-	unsigned long delta_size, base_size;
+	unsigned long delta_size, base_size, src_copied, literal_added;
 
 	if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
 		return 0; /* leave symlink rename alone */
@@ -61,10 +61,17 @@ static int very_different(struct diff_fi
 		return MAX_SCORE;
 
 	/* Estimate the edit size by interpreting delta. */
-	delta_size = count_delta(delta, delta_size);
+	if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+		free(delta);
+		return 0;
+	}
 	free(delta);
-	if (delta_size == UINT_MAX)
-		return 0; /* error in delta computation */
+
+	/* Extent of damage */
+	if (src->size + literal_added < src_copied)
+		delta_size = 0;
+	else
+		delta_size = (src->size - src_copied) + literal_added;
 
 	if (base_size < delta_size)
 		return MAX_SCORE;
diff --git a/diffcore-rename.c b/diffcore-rename.c
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -135,7 +135,7 @@ static int estimate_similarity(struct di
 	 * call into this function in that case.
 	 */
 	void *delta;
-	unsigned long delta_size, base_size;
+	unsigned long delta_size, base_size, src_copied, literal_added;
 	int score;
 
 	/* We deal only with regular files.  Symlink renames are handled
@@ -174,10 +174,17 @@ static int estimate_similarity(struct di
 		return 0;
 
 	/* Estimate the edit size by interpreting delta. */
-	delta_size = count_delta(delta, delta_size);
-	free(delta);
-	if (delta_size == UINT_MAX)
+	if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+		free(delta);
 		return 0;
+	}
+	free(delta);
+
+	/* Extent of damage */
+	if (src->size + literal_added < src_copied)
+		delta_size = 0;
+	else
+		delta_size = (src->size - src_copied) + literal_added;
 
 	/*
 	 * Now we will give some score to it.  100% edit gets 0 points
------------


  reply	other threads:[~2005-06-03  8:34 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-05-30 20:00 I want to release a "git-1.0" Linus Torvalds
2005-05-30 20:33 ` jeff millar
2005-05-30 20:49 ` Nicolas Pitre
2005-06-01  6:52   ` Junio C Hamano
2005-06-01  8:24     ` [PATCH] Add -d flag to git-pull-* family Junio C Hamano
2005-06-01 14:39       ` Nicolas Pitre
2005-06-01 16:00         ` Junio C Hamano
     [not found]           ` <7v1x7lk8fl.fsf_-_@assigned-by-dhcp.cox.net>
2005-06-02  0:47             ` [PATCH] Handle deltified object correctly in git-*-pull family Nicolas Pitre
     [not found]             ` <7vpsv5hbm5.fsf@assigned-by-dhcp.cox.net>
2005-06-02  0:51               ` [PATCH] Stop inflating the whole SHA1 file only to check size Nicolas Pitre
2005-06-02  1:32                 ` Junio C Hamano
2005-06-02  0:58             ` [PATCH] Handle deltified object correctly in git-*-pull family Linus Torvalds
2005-06-02  1:43               ` Junio C Hamano
2005-05-30 20:59 ` I want to release a "git-1.0" Junio C Hamano
2005-05-30 21:07 ` Junio C Hamano
2005-05-30 22:11 ` David Greaves
2005-05-30 22:12 ` Dave Jones
2005-05-30 22:55   ` Dmitry Torokhov
2005-05-30 23:15     ` Junio C Hamano
2005-05-30 23:23     ` Dmitry Torokhov
2005-05-31  0:52   ` Linus Torvalds
2005-05-30 22:19 ` Ryan Anderson
2005-05-31  0:58   ` Linus Torvalds
2005-05-30 22:32 ` Chris Wedgwood
2005-05-30 23:56   ` Chris Wedgwood
2005-05-31  1:06   ` Linus Torvalds
2005-06-01  2:11     ` Junio C Hamano
2005-06-01  2:25       ` David Lang
2005-06-01  4:53         ` Junio C Hamano
2005-06-01 20:06           ` David Lang
2005-06-01 20:16             ` C. Scott Ananian
2005-06-02  0:43               ` Nicolas Pitre
2005-06-02  1:14                 ` Brian O'Mahoney
2005-06-01 23:03             ` Junio C Hamano
2005-05-31  0:19 ` Petr Baudis
2005-05-31 13:45 ` Eric W. Biederman
2005-06-01  3:04   ` Linus Torvalds
2005-06-01  4:06     ` Junio C Hamano
2005-06-02 23:54       ` [PATCH] Fix -B "very-different" logic Junio C Hamano
2005-06-03  0:21         ` Linus Torvalds
2005-06-03  1:33           ` Junio C Hamano
2005-06-03  8:32             ` [PATCH 0/4] " Junio C Hamano
2005-06-03  8:36               ` Junio C Hamano [this message]
2005-06-03  8:36               ` [PATCH 2/4] diff: Fix docs and add -O to diff-helper Junio C Hamano
2005-06-03  8:37               ` [PATCH 3/4] diff: Clean up diff_scoreopt_parse() Junio C Hamano
2005-06-03  8:40               ` [PATCH 4/4] diff: Update -B heuristics Junio C Hamano
2005-06-01  6:28     ` I want to release a "git-1.0" Junio C Hamano
2005-06-01 22:00     ` Daniel Barkalow
2005-06-01 23:05       ` Junio C Hamano
2005-06-03  9:47       ` Petr Baudis
2005-06-03 15:09         ` Daniel Barkalow
2005-06-02  7:15     ` Eric W. Biederman
2005-06-02  8:32       ` Kay Sievers
2005-06-02 14:52       ` Linus Torvalds
2005-06-02 12:02     ` [PATCH] several typos in tutorial Alexey Nezhdanov
2005-06-02 12:41       ` Vincent Hanquez
2005-06-02 12:45         ` Alexey Nezhdanov
2005-06-02 12:51           ` Vincent Hanquez
2005-06-02 12:56             ` Alexey Nezhdanov
2005-06-02 13:00             ` Alexey Nezhdanov
2005-06-02 23:40     ` I want to release a "git-1.0" Adam Kropelin
2005-06-03  0:06       ` Linus Torvalds
2005-06-03  0:47         ` Linus Torvalds
2005-06-03  1:34           ` Adam Kropelin
2005-06-02 19:43 ` CVS migration section to the tutorial Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vekbjq1l8.fsf_-_@assigned-by-dhcp.cox.net \
    --to=junkio@cox.net \
    --cc=git@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).