git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Phillip Wood via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Phillip Wood <phillip.wood@dunelm.org.uk>,
	Phillip Wood <phillip.wood@dunelm.org.uk>
Subject: [PATCH 1/3] diff histogram: intern strings
Date: Wed, 17 Nov 2021 11:20:23 +0000	[thread overview]
Message-ID: <38c771a74d2a348e6a752555f95b746de029b1d7.1637148025.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1079.git.1637148025.gitgitgadget@gmail.com>

From: Phillip Wood <phillip.wood@dunelm.org.uk>

Histogram is the only diff algorithm not to call
xdl_classify_record(). xdl_classify_record() ensures that the hash
values of two strings that are not equal differ which means that it is
not necessary to use xdl_recmatch() when comparing lines, all that is
necessary is to compare the hash values. This gives a 7% reduction in
the runtime of "git log --patch" when using the histogram diff
algorithm.

Test                                  HEAD^             HEAD
-----------------------------------------------------------------------------
4000.1: log -3000 (baseline)          0.18(0.14+0.04)   0.19(0.17+0.02) +5.6%
4000.2: log --raw -3000 (tree-only)   0.99(0.77+0.21)   0.98(0.78+0.20) -1.0%
4000.3: log -p -3000 (Myers)          4.84(4.31+0.51)   4.81(4.15+0.64) -0.6%
4000.4: log -p -3000 --histogram      6.34(5.86+0.46)   5.87(5.19+0.66) -7.4%
4000.5: log -p -3000 --patience       5.39(4.60+0.76)   5.35(4.60+0.73) -0.7%

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xhistogram.c |  5 ++---
 xdiff/xprepare.c   | 24 ++++++++----------------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index e694bfd9e31..6c1c88a69a1 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -91,9 +91,8 @@ struct region {
 static int cmp_recs(xpparam_t const *xpp,
 	xrecord_t *r1, xrecord_t *r2)
 {
-	return r1->ha == r2->ha &&
-		xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size,
-			    xpp->flags);
+	return r1->ha == r2->ha;
+
 }
 
 #define CMP_ENV(xpp, env, s1, l1, s2, l2) \
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index abeb8fb84e6..7fae0727a02 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -181,15 +181,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 	if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
 		goto abort;
 
-	if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-		hbits = hsize = 0;
-	else {
-		hbits = xdl_hashbits((unsigned int) narec);
-		hsize = 1 << hbits;
-		if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
-			goto abort;
-		memset(rhash, 0, hsize * sizeof(xrecord_t *));
-	}
+	hbits = xdl_hashbits((unsigned int) narec);
+	hsize = 1 << hbits;
+	if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
+		goto abort;
+	memset(rhash, 0, hsize * sizeof(xrecord_t *));
 
 	nrec = 0;
 	if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
@@ -208,9 +204,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 			crec->size = (long) (cur - prev);
 			crec->ha = hav;
 			recs[nrec++] = crec;
-
-			if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-			    xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
+			if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
 				goto abort;
 		}
 	}
@@ -279,8 +273,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	enl1 = xdl_guess_lines(mf1, sample) + 1;
 	enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-	if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
-	    xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
+	if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
 		return -1;
 
 	if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -305,8 +298,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		return -1;
 	}
 
-	if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
-		xdl_free_classifier(&cf);
+	xdl_free_classifier(&cf);
 
 	return 0;
 }
-- 
gitgitgadget


  reply	other threads:[~2021-11-17 11:20 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-17 11:20 [PATCH 0/3] xdiff: speedup histogram diff Phillip Wood via GitGitGadget
2021-11-17 11:20 ` Phillip Wood via GitGitGadget [this message]
2021-11-17 15:55   ` [PATCH 1/3] diff histogram: intern strings Derrick Stolee
2021-11-17 16:46     ` Jeff King
2021-11-17 16:52     ` Phillip Wood
2021-11-18 15:35       ` Johannes Schindelin
2021-11-18 15:42         ` Jeff King
2021-11-19 10:05           ` Phillip Wood
2021-11-19 14:45             ` Jeff King
2021-11-19 21:22               ` Ævar Arnfjörð Bjarmason
2021-11-19 22:19                 ` Jeff King
2021-11-19 15:49             ` Johannes Schindelin
2021-11-17 11:20 ` [PATCH 2/3] xdiff: avoid unnecessary memory allocations Phillip Wood via GitGitGadget
2021-11-17 11:20 ` [PATCH 3/3] xdiff: simplify comparison Phillip Wood via GitGitGadget
2021-11-18 15:40 ` [PATCH 0/3] xdiff: speedup histogram diff Johannes Schindelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=38c771a74d2a348e6a752555f95b746de029b1d7.1637148025.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=phillip.wood@dunelm.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).