From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Git Mailing List <git@vger.kernel.org>
Subject: [PATCH 1/4] Tweak count-delta interface
Date: Fri, 03 Jun 2005 01:36:03 -0700 [thread overview]
Message-ID: <7vekbjq1l8.fsf_-_@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7vis0vq1rz.fsf_-_@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Fri, 03 Jun 2005 01:32:00 -0700")
Make it return copied source and insertion separately, so that
later implementation of heuristics can use them more flexibly.
This does not change the heuristics implemented in
diffcore-rename nor diffcore-break in any way.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
count-delta.h | 3 ++-
diffcore.h | 2 --
count-delta.c | 30 ++++++++++++++++--------------
diffcore-break.c | 15 +++++++++++----
diffcore-rename.c | 15 +++++++++++----
5 files changed, 40 insertions(+), 25 deletions(-)
diff --git a/count-delta.h b/count-delta.h
--- a/count-delta.h
+++ b/count-delta.h
@@ -4,6 +4,7 @@
#ifndef COUNT_DELTA_H
#define COUNT_DELTA_H
-unsigned long count_delta(void *, unsigned long);
+int count_delta(void *, unsigned long,
+ unsigned long *src_copied, unsigned long *literal_added);
#endif
diff --git a/diffcore.h b/diffcore.h
--- a/diffcore.h
+++ b/diffcore.h
@@ -12,8 +12,6 @@
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
-#define RENAME_DST_MATCHED 01
-
struct diff_filespec {
unsigned char sha1[20];
char *path;
diff --git a/count-delta.c b/count-delta.c
--- a/count-delta.c
+++ b/count-delta.c
@@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const
/*
* NOTE. We do not _interpret_ delta fully. As an approximation, we
* just count the number of bytes that are copied from the source, and
- * the number of literal data bytes that are inserted. Number of
- * bytes that are _not_ copied from the source is deletion, and number
- * of inserted literal bytes are addition, so sum of them is what we
- * return. xdelta can express an edit that copies data inside of the
- * destination which originally came from the source. We do not count
- * that in the following routine, so we are undercounting the source
- * material that remains in the final output that way.
+ * the number of literal data bytes that are inserted.
+ *
+ * Number of bytes that are _not_ copied from the source is deletion,
+ * and number of inserted literal bytes are addition, so sum of them
+ * is the extent of damage. xdelta can express an edit that copies
+ * data inside of the destination which originally came from the
+ * source. We do not count that in the following routine, so we are
+ * undercounting the source material that remains in the final output
+ * that way.
*/
-unsigned long count_delta(void *delta_buf, unsigned long delta_size)
+int count_delta(void *delta_buf, unsigned long delta_size,
+ unsigned long *src_copied, unsigned long *literal_added)
{
unsigned long copied_from_source, added_literal;
const unsigned char *data, *top;
@@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_bu
/* the smallest delta size possible is 6 bytes */
if (delta_size < 6)
- return UINT_MAX;
+ return -1;
data = delta_buf;
top = delta_buf + delta_size;
@@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_bu
/* sanity check */
if (data != top || out != dst_size)
- return UINT_MAX;
+ return -1;
/* delete size is what was _not_ copied from source.
* edit size is that and literal additions.
*/
- if (src_size + added_literal < copied_from_source)
- /* we ended up overcounting and underflowed */
- return 0;
- return (src_size - copied_from_source) + added_literal;
+ *src_copied = copied_from_source;
+ *literal_added = added_literal;
+ return 0;
}
diff --git a/diffcore-break.c b/diffcore-break.c
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -23,7 +23,7 @@ static int very_different(struct diff_fi
* want to get the filepair broken.
*/
void *delta;
- unsigned long delta_size, base_size;
+ unsigned long delta_size, base_size, src_copied, literal_added;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0; /* leave symlink rename alone */
@@ -61,10 +61,17 @@ static int very_different(struct diff_fi
return MAX_SCORE;
/* Estimate the edit size by interpreting delta. */
- delta_size = count_delta(delta, delta_size);
+ if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+ free(delta);
+ return 0;
+ }
free(delta);
- if (delta_size == UINT_MAX)
- return 0; /* error in delta computation */
+
+ /* Extent of damage */
+ if (src->size + literal_added < src_copied)
+ delta_size = 0;
+ else
+ delta_size = (src->size - src_copied) + literal_added;
if (base_size < delta_size)
return MAX_SCORE;
diff --git a/diffcore-rename.c b/diffcore-rename.c
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -135,7 +135,7 @@ static int estimate_similarity(struct di
* call into this function in that case.
*/
void *delta;
- unsigned long delta_size, base_size;
+ unsigned long delta_size, base_size, src_copied, literal_added;
int score;
/* We deal only with regular files. Symlink renames are handled
@@ -174,10 +174,17 @@ static int estimate_similarity(struct di
return 0;
/* Estimate the edit size by interpreting delta. */
- delta_size = count_delta(delta, delta_size);
- free(delta);
- if (delta_size == UINT_MAX)
+ if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+ free(delta);
return 0;
+ }
+ free(delta);
+
+ /* Extent of damage */
+ if (src->size + literal_added < src_copied)
+ delta_size = 0;
+ else
+ delta_size = (src->size - src_copied) + literal_added;
/*
* Now we will give some score to it. 100% edit gets 0 points
------------
next prev parent reply other threads:[~2005-06-03 8:34 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-05-30 20:00 I want to release a "git-1.0" Linus Torvalds
2005-05-30 20:33 ` jeff millar
2005-05-30 20:49 ` Nicolas Pitre
2005-06-01 6:52 ` Junio C Hamano
2005-06-01 8:24 ` [PATCH] Add -d flag to git-pull-* family Junio C Hamano
2005-06-01 14:39 ` Nicolas Pitre
2005-06-01 16:00 ` Junio C Hamano
[not found] ` <7v1x7lk8fl.fsf_-_@assigned-by-dhcp.cox.net>
2005-06-02 0:47 ` [PATCH] Handle deltified object correctly in git-*-pull family Nicolas Pitre
[not found] ` <7vpsv5hbm5.fsf@assigned-by-dhcp.cox.net>
2005-06-02 0:51 ` [PATCH] Stop inflating the whole SHA1 file only to check size Nicolas Pitre
2005-06-02 1:32 ` Junio C Hamano
2005-06-02 0:58 ` [PATCH] Handle deltified object correctly in git-*-pull family Linus Torvalds
2005-06-02 1:43 ` Junio C Hamano
2005-05-30 20:59 ` I want to release a "git-1.0" Junio C Hamano
2005-05-30 21:07 ` Junio C Hamano
2005-05-30 22:11 ` David Greaves
2005-05-30 22:12 ` Dave Jones
2005-05-30 22:55 ` Dmitry Torokhov
2005-05-30 23:15 ` Junio C Hamano
2005-05-30 23:23 ` Dmitry Torokhov
2005-05-31 0:52 ` Linus Torvalds
2005-05-30 22:19 ` Ryan Anderson
2005-05-31 0:58 ` Linus Torvalds
2005-05-30 22:32 ` Chris Wedgwood
2005-05-30 23:56 ` Chris Wedgwood
2005-05-31 1:06 ` Linus Torvalds
2005-06-01 2:11 ` Junio C Hamano
2005-06-01 2:25 ` David Lang
2005-06-01 4:53 ` Junio C Hamano
2005-06-01 20:06 ` David Lang
2005-06-01 20:16 ` C. Scott Ananian
2005-06-02 0:43 ` Nicolas Pitre
2005-06-02 1:14 ` Brian O'Mahoney
2005-06-01 23:03 ` Junio C Hamano
2005-05-31 0:19 ` Petr Baudis
2005-05-31 13:45 ` Eric W. Biederman
2005-06-01 3:04 ` Linus Torvalds
2005-06-01 4:06 ` Junio C Hamano
2005-06-02 23:54 ` [PATCH] Fix -B "very-different" logic Junio C Hamano
2005-06-03 0:21 ` Linus Torvalds
2005-06-03 1:33 ` Junio C Hamano
2005-06-03 8:32 ` [PATCH 0/4] " Junio C Hamano
2005-06-03 8:36 ` Junio C Hamano [this message]
2005-06-03 8:36 ` [PATCH 2/4] diff: Fix docs and add -O to diff-helper Junio C Hamano
2005-06-03 8:37 ` [PATCH 3/4] diff: Clean up diff_scoreopt_parse() Junio C Hamano
2005-06-03 8:40 ` [PATCH 4/4] diff: Update -B heuristics Junio C Hamano
2005-06-01 6:28 ` I want to release a "git-1.0" Junio C Hamano
2005-06-01 22:00 ` Daniel Barkalow
2005-06-01 23:05 ` Junio C Hamano
2005-06-03 9:47 ` Petr Baudis
2005-06-03 15:09 ` Daniel Barkalow
2005-06-02 7:15 ` Eric W. Biederman
2005-06-02 8:32 ` Kay Sievers
2005-06-02 14:52 ` Linus Torvalds
2005-06-02 12:02 ` [PATCH] several typos in tutorial Alexey Nezhdanov
2005-06-02 12:41 ` Vincent Hanquez
2005-06-02 12:45 ` Alexey Nezhdanov
2005-06-02 12:51 ` Vincent Hanquez
2005-06-02 12:56 ` Alexey Nezhdanov
2005-06-02 13:00 ` Alexey Nezhdanov
2005-06-02 23:40 ` I want to release a "git-1.0" Adam Kropelin
2005-06-03 0:06 ` Linus Torvalds
2005-06-03 0:47 ` Linus Torvalds
2005-06-03 1:34 ` Adam Kropelin
2005-06-02 19:43 ` CVS migration section to the tutorial Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7vekbjq1l8.fsf_-_@assigned-by-dhcp.cox.net \
--to=junkio@cox.net \
--cc=git@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).