git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Stefan Beller <sbeller@google.com>
To: git@vger.kernel.org
Cc: Stefan Beller <sbeller@google.com>
Subject: [PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs
Date: Thu, 19 Jul 2018 11:56:20 -0700	[thread overview]
Message-ID: <20180719185620.124768-4-sbeller@google.com> (raw)
In-Reply-To: <20180719185620.124768-1-sbeller@google.com>

This fixes a memory issue when recursing a lot, which can be reproduced as

    seq 1   100000 >one
    seq 1 4 100000 >two
    git diff --no-index --histogram one two

Before this patch, histogram_diff would call itself recursively before
calling free_index, which would mean a lot of memory is allocated during
the recursion and only freed afterwards. By moving the memory allocation
(and its free call) into find_lcs, the memory is free'd before we recurse,
such that memory is reused in the next step of the recursion instead of
using new memory.

This addresses only the memory pressure, not the run time complexity,
that is also awful for the corner case outlined above.

Helpful in understanding the code (in addition to the sparse history of
this file), was https://stackoverflow.com/a/32367597 which reproduces
most of the code comments of the JGit implementation.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xhistogram.c | 96 +++++++++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 43 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 5098b6c5021..fc2d3cd95d9 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -251,44 +251,13 @@ static inline void free_index(struct histindex *index)
 	xdl_cha_free(&index->rcha);
 }
 
-static int find_lcs(struct histindex *index, struct region *lcs,
-	int line1, int count1, int line2, int count2) {
-	int b_ptr;
-
-	if (scanA(index, line1, count1))
-		return -1;
-
-	index->cnt = index->max_chain_length + 1;
-
-	for (b_ptr = line2; b_ptr <= LINE_END(2); )
-		b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2);
-
-	return index->has_common && index->max_chain_length < index->cnt;
-}
-
-static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
-	int line1, int count1, int line2, int count2)
+static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
+		    struct region *lcs,
+		    int line1, int count1, int line2, int count2)
 {
+	int b_ptr;
+	int sz, ret = -1;
 	struct histindex index;
-	struct region lcs;
-	int sz;
-	int result = -1;
-
-	if (count1 <= 0 && count2 <= 0)
-		return 0;
-
-	if (LINE_END(1) >= MAX_PTR)
-		return -1;
-
-	if (!count1) {
-		while(count2--)
-			env->xdf2.rchg[line2++ - 1] = 1;
-		return 0;
-	} else if (!count2) {
-		while(count1--)
-			env->xdf1.rchg[line1++ - 1] = 1;
-		return 0;
-	}
 
 	memset(&index, 0, sizeof(index));
 
@@ -326,8 +295,52 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 	index.ptr_shift = line1;
 	index.max_chain_length = 64;
 
+	if (scanA(&index, line1, count1))
+		goto cleanup;
+
+	index.cnt = index.max_chain_length + 1;
+
+	for (b_ptr = line2; b_ptr <= LINE_END(2); )
+		b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2);
+
+	if (index.has_common && index.max_chain_length < index.cnt)
+		ret = 1;
+	else
+		ret = 0;
+
+cleanup:
+	free_index(&index);
+	return ret;
+}
+
+static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
+	int line1, int count1, int line2, int count2)
+{
+	struct region lcs;
+	int lcs_found;
+	int result = -1;
+
+	if (count1 <= 0 && count2 <= 0)
+		return 0;
+
+	if (LINE_END(1) >= MAX_PTR)
+		return -1;
+
+	if (!count1) {
+		while(count2--)
+			env->xdf2.rchg[line2++ - 1] = 1;
+		return 0;
+	} else if (!count2) {
+		while(count1--)
+			env->xdf1.rchg[line1++ - 1] = 1;
+		return 0;
+	}
+
 	memset(&lcs, 0, sizeof(lcs));
-	if (find_lcs(&index, &lcs, line1, count1, line2, count2))
+	lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
+	if (lcs_found < 0)
+		goto out;
+	else if (lcs_found)
 		result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
 	else {
 		if (lcs.begin1 == 0 && lcs.begin2 == 0) {
@@ -341,18 +354,15 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 						line1, lcs.begin1 - line1,
 						line2, lcs.begin2 - line2);
 			if (result)
-				goto cleanup;
+				goto out;
 			result = histogram_diff(xpp, env,
 						lcs.end1 + 1, LINE_END(1) - lcs.end1,
 						lcs.end2 + 1, LINE_END(2) - lcs.end2);
 			if (result)
-				goto cleanup;
+				goto out;
 		}
 	}
-
-cleanup:
-	free_index(&index);
-
+out:
 	return result;
 }
 
-- 
2.18.0.233.g985f88cf7e-goog


  parent reply	other threads:[~2018-07-19 18:56 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-19 18:56 [PATCH 0/3] histogram diff: Fix memory ballooning in corner case Stefan Beller
2018-07-19 18:56 ` [PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff Stefan Beller
2018-07-19 18:56 ` [PATCH 2/3] xdiff/xhistogram: factor out memory cleanup into free_index() Stefan Beller
2018-07-19 18:56 ` Stefan Beller [this message]
2018-07-19 22:19   ` [PATCH] xdiff/histogram: remove tail recursion Stefan Beller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180719185620.124768-4-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).