git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 0/3] histogram diff: Fix memory ballooning in corner case
@ 2018-07-19 18:56 Stefan Beller
  2018-07-19 18:56 ` [PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff Stefan Beller
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Stefan Beller @ 2018-07-19 18:56 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

Currently if you run

    seq 1   100000 >one
    seq 1 4 100000 >two
    git diff --no-index --histogram one two

you might run into memory issues. After applying the last patch of this series
you only have to wait a bit (as it doesn't fix run time complexity), but
computes the result with less memory pressure.

Thanks,
Stefan

Stefan Beller (3):
  xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff
  xdiff/xhistogram: factor out memory cleanup into free_index()
  xdiff/xhistogram: move index allocation into find_lcs

 xdiff/xhistogram.c | 117 +++++++++++++++++++++++++--------------------
 1 file changed, 66 insertions(+), 51 deletions(-)

-- 
2.18.0.233.g985f88cf7e-goog


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff
  2018-07-19 18:56 [PATCH 0/3] histogram diff: Fix memory ballooning in corner case Stefan Beller
@ 2018-07-19 18:56 ` Stefan Beller
  2018-07-19 18:56 ` [PATCH 2/3] xdiff/xhistogram: factor out memory cleanup into free_index() Stefan Beller
  2018-07-19 18:56 ` [PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs Stefan Beller
  2 siblings, 0 replies; 5+ messages in thread
From: Stefan Beller @ 2018-07-19 18:56 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

By passing the 'xpp' and 'env' argument directly to the function
'fall_back_to_classic_diff', we eliminate an occurrence of the 'index'
in histogram_diff, which will prove useful in a bit.

While at it, move it up in the file. This will make the diff of
one of the next patches more legible.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xhistogram.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 73210cb6f3f..6e20f75fe85 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -233,6 +233,16 @@ static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr,
 	return b_next;
 }
 
+static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
+		int line1, int count1, int line2, int count2)
+{
+	xpparam_t xpparam;
+	xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
+
+	return xdl_fall_back_diff(env, &xpparam,
+				  line1, count1, line2, count2);
+}
+
 static int find_lcs(struct histindex *index, struct region *lcs,
 	int line1, int count1, int line2, int count2) {
 	int b_ptr;
@@ -248,16 +258,6 @@ static int find_lcs(struct histindex *index, struct region *lcs,
 	return index->has_common && index->max_chain_length < index->cnt;
 }
 
-static int fall_back_to_classic_diff(struct histindex *index,
-		int line1, int count1, int line2, int count2)
-{
-	xpparam_t xpp;
-	xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
-
-	return xdl_fall_back_diff(index->env, &xpp,
-				  line1, count1, line2, count2);
-}
-
 static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 	int line1, int count1, int line2, int count2)
 {
@@ -320,7 +320,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 
 	memset(&lcs, 0, sizeof(lcs));
 	if (find_lcs(&index, &lcs, line1, count1, line2, count2))
-		result = fall_back_to_classic_diff(&index, line1, count1, line2, count2);
+		result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
 	else {
 		if (lcs.begin1 == 0 && lcs.begin2 == 0) {
 			while (count1--)
-- 
2.18.0.233.g985f88cf7e-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] xdiff/xhistogram: factor out memory cleanup into free_index()
  2018-07-19 18:56 [PATCH 0/3] histogram diff: Fix memory ballooning in corner case Stefan Beller
  2018-07-19 18:56 ` [PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff Stefan Beller
@ 2018-07-19 18:56 ` Stefan Beller
  2018-07-19 18:56 ` [PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs Stefan Beller
  2 siblings, 0 replies; 5+ messages in thread
From: Stefan Beller @ 2018-07-19 18:56 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

This will be useful in the next patch as we'll introduce multiple
callers.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xhistogram.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 6e20f75fe85..5098b6c5021 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -243,6 +243,14 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
 				  line1, count1, line2, count2);
 }
 
+static inline void free_index(struct histindex *index)
+{
+	xdl_free(index->records);
+	xdl_free(index->line_map);
+	xdl_free(index->next_ptrs);
+	xdl_cha_free(&index->rcha);
+}
+
 static int find_lcs(struct histindex *index, struct region *lcs,
 	int line1, int count1, int line2, int count2) {
 	int b_ptr;
@@ -343,10 +351,7 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 	}
 
 cleanup:
-	xdl_free(index.records);
-	xdl_free(index.line_map);
-	xdl_free(index.next_ptrs);
-	xdl_cha_free(&index.rcha);
+	free_index(&index);
 
 	return result;
 }
-- 
2.18.0.233.g985f88cf7e-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs
  2018-07-19 18:56 [PATCH 0/3] histogram diff: Fix memory ballooning in corner case Stefan Beller
  2018-07-19 18:56 ` [PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff Stefan Beller
  2018-07-19 18:56 ` [PATCH 2/3] xdiff/xhistogram: factor out memory cleanup into free_index() Stefan Beller
@ 2018-07-19 18:56 ` Stefan Beller
  2018-07-19 22:19   ` [PATCH] xdiff/histogram: remove tail recursion Stefan Beller
  2 siblings, 1 reply; 5+ messages in thread
From: Stefan Beller @ 2018-07-19 18:56 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

This fixes a memory issue when recursing a lot, which can be reproduced as

    seq 1   100000 >one
    seq 1 4 100000 >two
    git diff --no-index --histogram one two

Before this patch, histogram_diff would call itself recursively before
calling free_index, which would mean a lot of memory is allocated during
the recursion and only freed afterwards. By moving the memory allocation
(and its free call) into find_lcs, the memory is free'd before we recurse,
such that memory is reused in the next step of the recursion instead of
using new memory.

This addresses only the memory pressure, not the run time complexity,
that is also awful for the corner case outlined above.

Helpful in understanding the code (in addition to the sparse history of
this file), was https://stackoverflow.com/a/32367597 which reproduces
most of the code comments of the JGit implementation.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xhistogram.c | 96 +++++++++++++++++++++++++---------------------
 1 file changed, 53 insertions(+), 43 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 5098b6c5021..fc2d3cd95d9 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -251,44 +251,13 @@ static inline void free_index(struct histindex *index)
 	xdl_cha_free(&index->rcha);
 }
 
-static int find_lcs(struct histindex *index, struct region *lcs,
-	int line1, int count1, int line2, int count2) {
-	int b_ptr;
-
-	if (scanA(index, line1, count1))
-		return -1;
-
-	index->cnt = index->max_chain_length + 1;
-
-	for (b_ptr = line2; b_ptr <= LINE_END(2); )
-		b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2);
-
-	return index->has_common && index->max_chain_length < index->cnt;
-}
-
-static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
-	int line1, int count1, int line2, int count2)
+static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
+		    struct region *lcs,
+		    int line1, int count1, int line2, int count2)
 {
+	int b_ptr;
+	int sz, ret = -1;
 	struct histindex index;
-	struct region lcs;
-	int sz;
-	int result = -1;
-
-	if (count1 <= 0 && count2 <= 0)
-		return 0;
-
-	if (LINE_END(1) >= MAX_PTR)
-		return -1;
-
-	if (!count1) {
-		while(count2--)
-			env->xdf2.rchg[line2++ - 1] = 1;
-		return 0;
-	} else if (!count2) {
-		while(count1--)
-			env->xdf1.rchg[line1++ - 1] = 1;
-		return 0;
-	}
 
 	memset(&index, 0, sizeof(index));
 
@@ -326,8 +295,52 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 	index.ptr_shift = line1;
 	index.max_chain_length = 64;
 
+	if (scanA(&index, line1, count1))
+		goto cleanup;
+
+	index.cnt = index.max_chain_length + 1;
+
+	for (b_ptr = line2; b_ptr <= LINE_END(2); )
+		b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2);
+
+	if (index.has_common && index.max_chain_length < index.cnt)
+		ret = 1;
+	else
+		ret = 0;
+
+cleanup:
+	free_index(&index);
+	return ret;
+}
+
+static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
+	int line1, int count1, int line2, int count2)
+{
+	struct region lcs;
+	int lcs_found;
+	int result = -1;
+
+	if (count1 <= 0 && count2 <= 0)
+		return 0;
+
+	if (LINE_END(1) >= MAX_PTR)
+		return -1;
+
+	if (!count1) {
+		while(count2--)
+			env->xdf2.rchg[line2++ - 1] = 1;
+		return 0;
+	} else if (!count2) {
+		while(count1--)
+			env->xdf1.rchg[line1++ - 1] = 1;
+		return 0;
+	}
+
 	memset(&lcs, 0, sizeof(lcs));
-	if (find_lcs(&index, &lcs, line1, count1, line2, count2))
+	lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
+	if (lcs_found < 0)
+		goto out;
+	else if (lcs_found)
 		result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
 	else {
 		if (lcs.begin1 == 0 && lcs.begin2 == 0) {
@@ -341,18 +354,15 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 						line1, lcs.begin1 - line1,
 						line2, lcs.begin2 - line2);
 			if (result)
-				goto cleanup;
+				goto out;
 			result = histogram_diff(xpp, env,
 						lcs.end1 + 1, LINE_END(1) - lcs.end1,
 						lcs.end2 + 1, LINE_END(2) - lcs.end2);
 			if (result)
-				goto cleanup;
+				goto out;
 		}
 	}
-
-cleanup:
-	free_index(&index);
-
+out:
 	return result;
 }
 
-- 
2.18.0.233.g985f88cf7e-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH] xdiff/histogram: remove tail recursion
  2018-07-19 18:56 ` [PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs Stefan Beller
@ 2018-07-19 22:19   ` Stefan Beller
  0 siblings, 0 replies; 5+ messages in thread
From: Stefan Beller @ 2018-07-19 22:19 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

When running the same reproduction script as the previous patch,
it turns out the stack is too small, which can be easily avoided.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xhistogram.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index fc2d3cd95d9..ec85f5992bd 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -318,7 +318,9 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 {
 	struct region lcs;
 	int lcs_found;
-	int result = -1;
+	int result;
+redo:
+	result = -1;
 
 	if (count1 <= 0 && count2 <= 0)
 		return 0;
@@ -355,11 +357,17 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
 						line2, lcs.begin2 - line2);
 			if (result)
 				goto out;
-			result = histogram_diff(xpp, env,
-						lcs.end1 + 1, LINE_END(1) - lcs.end1,
-						lcs.end2 + 1, LINE_END(2) - lcs.end2);
-			if (result)
-				goto out;
+			/*
+			 * result = histogram_diff(xpp, env,
+			 *            lcs.end1 + 1, LINE_END(1) - lcs.end1,
+			 *            lcs.end2 + 1, LINE_END(2) - lcs.end2);
+			 * but let's optimize tail recursion ourself:
+			*/
+			count1 = LINE_END(1) - lcs.end1;
+			line1 = lcs.end1 + 1;
+			count2 = LINE_END(2) - lcs.end2;
+			line2 = lcs.end2 + 1;
+			goto redo;
 		}
 	}
 out:
-- 
2.18.0.233.g985f88cf7e-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-07-19 22:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-19 18:56 [PATCH 0/3] histogram diff: Fix memory ballooning in corner case Stefan Beller
2018-07-19 18:56 ` [PATCH 1/3] xdiff/xhistogram: pass arguments directly to fall_back_to_classic_diff Stefan Beller
2018-07-19 18:56 ` [PATCH 2/3] xdiff/xhistogram: factor out memory cleanup into free_index() Stefan Beller
2018-07-19 18:56 ` [PATCH 3/3] xdiff/xhistogram: move index allocation into find_lcs Stefan Beller
2018-07-19 22:19   ` [PATCH] xdiff/histogram: remove tail recursion Stefan Beller

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).