git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Stefan Beller <sbeller@google.com>
To: gitster@pobox.com
Cc: git@vger.kernel.org, jrnieder@gmail.com, l.s.r@web.de,
	peff@peff.net, sbeller@google.com
Subject: [PATCH 2/2] diff.c: get rid of duplicate implementation
Date: Wed, 25 Oct 2017 11:49:12 -0700	[thread overview]
Message-ID: <20171025184912.21657-3-sbeller@google.com> (raw)
In-Reply-To: <20171025184912.21657-1-sbeller@google.com>

The implementations in diff.c to detect moved lines needs to compare
strings and hash strings, which is implemented in that file, as well
as in the xdiff library.

Remove the rather recent implementation in diff.c and rely on the well
exercised code in the xdiff lib.

With this change the hash used for bucketing the strings for the moved
line detection changes from FNV32 (that is provided via the hashmaps
memhash) to DJB2 (which is used internally in xdiff).  Benchmarks found
on the web[1] do not indicate that these hashes are different in
performance for readable strings.

[1] https://softwareengineering.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 diff.c | 82 ++++--------------------------------------------------------------
 1 file changed, 4 insertions(+), 78 deletions(-)

diff --git a/diff.c b/diff.c
index c4a669ffa8..e6814b9e9c 100644
--- a/diff.c
+++ b/diff.c
@@ -707,88 +707,14 @@ struct moved_entry {
 	struct moved_entry *next_line;
 };
 
-static int next_byte(const char **cp, const char **endp,
-		     const struct diff_options *diffopt)
-{
-	int retval;
-
-	if (*cp >= *endp)
-		return -1;
-
-	if (isspace(**cp)) {
-		if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) {
-			while (*cp < *endp && isspace(**cp))
-				(*cp)++;
-			/*
-			 * After skipping a couple of whitespaces,
-			 * we still have to account for one space.
-			 */
-			return (int)' ';
-		}
-
-		if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) {
-			while (*cp < *endp && isspace(**cp))
-				(*cp)++;
-			/*
-			 * return the first non-ws character via the usual
-			 * below, unless we ate all of the bytes
-			 */
-			if (*cp >= *endp)
-				return -1;
-		}
-	}
-
-	retval = (unsigned char)(**cp);
-	(*cp)++;
-	return retval;
-}
-
 static int moved_entry_cmp(const struct diff_options *diffopt,
 			   const struct moved_entry *a,
 			   const struct moved_entry *b,
 			   const void *keydata)
 {
-	const char *ap = a->es->line, *ae = a->es->line + a->es->len;
-	const char *bp = b->es->line, *be = b->es->line + b->es->len;
-
-	if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS))
-		return a->es->len != b->es->len  || memcmp(ap, bp, a->es->len);
-
-	if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) {
-		while (ae > ap && isspace(ae[-1]))
-			ae--;
-		while (be > bp && isspace(be[-1]))
-			be--;
-	}
-
-	while (1) {
-		int ca, cb;
-		ca = next_byte(&ap, &ae, diffopt);
-		cb = next_byte(&bp, &be, diffopt);
-		if (ca != cb)
-			return 1;
-		if (ca < 0)
-			return 0;
-	}
-}
-
-static unsigned get_string_hash(struct emitted_diff_symbol *es, struct diff_options *o)
-{
-	if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
-		static struct strbuf sb = STRBUF_INIT;
-		const char *ap = es->line, *ae = es->line + es->len;
-		int c;
-
-		strbuf_reset(&sb);
-		while (ae > ap && isspace(ae[-1]))
-			ae--;
-		while ((c = next_byte(&ap, &ae, o)) >= 0)
-			strbuf_addch(&sb, c);
-
-		return memhash(sb.buf, sb.len);
-	} else {
-		return memhash(es->line, es->len);
-	}
+	return !xdiff_compare_lines(a->es->line, a->es->len,
+				    b->es->line, b->es->len,
+				    diffopt->xdl_opts);
 }
 
 static struct moved_entry *prepare_entry(struct diff_options *o,
@@ -797,7 +723,7 @@ static struct moved_entry *prepare_entry(struct diff_options *o,
 	struct moved_entry *ret = xmalloc(sizeof(*ret));
 	struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no];
 
-	ret->ent.hash = get_string_hash(l, o);
+	ret->ent.hash = xdiff_hash_string(l->line, l->len, o->xdl_opts);
 	ret->es = l;
 	ret->next_line = NULL;
 
-- 
2.15.0.rc2.6.g953226eb5f


  parent reply	other threads:[~2017-10-25 18:49 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-24 18:59 [PATCH 0/4] (x)diff cleanup: remove duplicate code Stefan Beller
2017-10-24 18:59 ` [PATCH 1/4] hashmap: introduce memhash_feed to access the internals of FNV-1 hash Stefan Beller
2017-10-24 20:23   ` René Scharfe
2017-10-24 20:48     ` Stefan Beller
2017-10-24 18:59 ` [PATCH 2/4] xdiff-interface: export comparing and hashing strings Stefan Beller
2017-10-24 20:23   ` René Scharfe
2017-10-24 20:42     ` Stefan Beller
2017-10-26 17:03       ` René Scharfe
2017-10-24 18:59 ` [PATCH 3/4] xdiff: use stronger hash function internally Stefan Beller
2017-10-24 20:23   ` René Scharfe
2017-10-24 20:46     ` Stefan Beller
2017-10-24 23:04   ` Jonathan Nieder
2017-10-24 18:59 ` [PATCH 4/4] diff.c: get rid of duplicate implementation Stefan Beller
2017-10-24 19:02 ` [PATCH 0/4] (x)diff cleanup: remove duplicate code Stefan Beller
2017-10-24 23:42   ` [PATCHv2 0/2] " Stefan Beller
2017-10-24 23:42     ` [PATCH 1/2] xdiff-interface: export comparing and hashing strings Stefan Beller
2017-10-25  4:26       ` Eric Sunshine
2017-10-24 23:42     ` [PATCH 2/2] diff.c: get rid of duplicate implementation Stefan Beller
2017-10-25  5:11     ` [PATCHv2 0/2] (x)diff cleanup: remove duplicate code Junio C Hamano
2017-10-25 18:47       ` Stefan Beller
2017-10-25 18:49       ` [PATCHv3 " Stefan Beller
2017-10-25 18:49         ` [PATCH 1/2] xdiff-interface: export comparing and hashing strings Stefan Beller
2017-10-26 17:12           ` René Scharfe
2017-10-27  7:12             ` Junio C Hamano
2017-10-27 17:15               ` Stefan Beller
2017-10-25 18:49         ` Stefan Beller [this message]
2017-10-26  2:23           ` [PATCH 2/2] diff.c: get rid of duplicate implementation Junio C Hamano
2017-10-26 17:43           ` René Scharfe
2017-10-30 17:59             ` Jeff King
2017-10-30 19:07               ` Jeff King
2017-10-24 23:45   ` [PATCH 1/5] fnv: migrate code from hashmap to fnv Stefan Beller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171025184912.21657-3-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jrnieder@gmail.com \
    --cc=l.s.r@web.de \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).