git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] diff: add --ignore-blank-lines option
@ 2013-05-26 17:58 Antoine Pelisse
  2013-05-26 20:35 ` Johannes Sixt
  2013-06-04 18:26 ` Junio C Hamano
  0 siblings, 2 replies; 26+ messages in thread
From: Antoine Pelisse @ 2013-05-26 17:58 UTC (permalink / raw)
  To: git; +Cc: Antoine Pelisse

The goal of the patch is to introduce the GNU diff
-B/--ignore-blank-lines as closely as possible. The short option is not
available because it's already used for "break-rewrites".

When this option is used, git-diff will not create hunks that simply
adds or removes empty lines, but will still show empty lines
addition/suppression if they are close enough to "valuable" changes.

Signed-off-by: Antoine Pelisse <apelisse@gmail.com>
---
 Documentation/diff-options.txt |    3 ++
 diff.c                         |    2 ++
 t/t4015-diff-whitespace.sh     |   65 ++++++++++++++++++++++++++++++++++++++++
 xdiff/xdiff.h                  |    2 ++
 xdiff/xdiffi.c                 |   29 +++++++++++++++++-
 xdiff/xdiffi.h                 |    1 +
 xdiff/xemit.c                  |   32 ++++++++++++++++----
 xdiff/xemit.h                  |    2 +-
 xdiff/xutils.c                 |   13 ++++++++
 xdiff/xutils.h                 |    1 +
 10 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 104579d..80f06b7 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -439,6 +439,9 @@ endif::git-format-patch[]
 	differences even if one line has whitespace where the other
 	line has none.

+--ignore-blank-lines::
+	Ignore changes whose lines are all blank.
+
 --inter-hunk-context=<lines>::
 	Show the context between diff hunks, up to the specified number
 	of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index f0b3e7c..208094f 100644
--- a/diff.c
+++ b/diff.c
@@ -3593,6 +3593,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
+	else if (!strcmp(arg, "--ignore-blank-lines"))
+		DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index cc3db13..b3c4fcc 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -142,6 +142,71 @@ EOF
 git diff --ignore-space-at-eol > out
 test_expect_success 'another test, with --ignore-space-at-eol' 'test_cmp expect out'

+test_expect_success 'ignore-blank-lines: only new lines' '
+	seq 5 >x &&
+	git update-index x &&
+	seq 5 | sed "/3/i \\\\" >x &&
+	git diff --ignore-blank-lines >out &&
+	printf "" >expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: only new lines with space' '
+	seq 5 >x &&
+	git update-index x &&
+	seq 5 | sed "/3/i \ " >x &&
+	git diff -w --ignore-blank-lines >out &&
+	printf "" >expect &&
+	test_cmp out expect
+'
+
+
+test_expect_success 'ignore-blank-lines: with changes' '
+	seq 11 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+
+	1
+	2
+	3
+	change
+	4
+	5
+	6
+	7
+
+	8
+	change
+	9
+	10
+	11
+
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	sed -e "1,/^+++ b\/x/d" <out.tmp >out &&
+	cat <<-\EOF >expect &&
+	@@ -1,6 +2,7 @@
+	 1
+	 2
+	 3
+	+change
+	 4
+	 5
+	 6
+	@@ -5,7 +7,9 @@
+	 5
+	 6
+	 7
+	+
+	 8
+	+change
+	 9
+	 10
+	 11
+	EOF
+	test_cmp out expect
+'
+
 test_expect_success 'check mixed spaces and tabs in indent' '

 	# This is indented with SP HT SP.
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 219a3bb..c033991 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -39,6 +39,8 @@ extern "C" {
 #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
 #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)

+#define XDF_IGNORE_BLANK_LINES (1 << 7)
+
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index b2eb6db..3cabc78 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -394,6 +394,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 	xch->i2 = i2;
 	xch->chg1 = chg1;
 	xch->chg2 = chg2;
+	xch->ignore = 0;

 	return xch;
 }
@@ -544,7 +545,9 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	xdchange_t *xch, *xche;

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;
 		if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
 				     xch->i2, xche->i2 + xche->chg2 - xch->i2,
 				     ecb->priv) < 0)
@@ -553,6 +556,27 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	return 0;
 }

+static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+{
+	xdchange_t *xch;
+
+	for (xch = xscr; xch; xch = xch->next) {
+		int ignore = 1;
+		xrecord_t **rec;
+		long i;
+
+		rec = &xe->xdf1.recs[xch->i1];
+		for (i = 0; i < xch->chg1 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, flags);
+
+		rec = &xe->xdf2.recs[xch->i2];
+		for (i = 0; i < xch->chg2 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, flags);
+
+		xch->ignore = ignore;
+	}
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
@@ -571,6 +595,9 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		return -1;
 	}
 	if (xscr) {
+		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
+			xdl_mark_ignorable(xscr, &xe, xpp->flags);
+
 		if (ef(&xe, xscr, ecb, xecfg) < 0) {

 			xdl_free_script(xscr);
diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h
index 7a92ea9..8b81206 100644
--- a/xdiff/xdiffi.h
+++ b/xdiff/xdiffi.h
@@ -41,6 +41,7 @@ typedef struct s_xdchange {
 	struct s_xdchange *next;
 	long i1, i2;
 	long chg1, chg2;
+	int ignore;
 } xdchange_t;


diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index d11dbf9..52dfef8 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -56,14 +56,34 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 /*
  * Starting at the passed change atom, find the latest change atom to be included
  * inside the differential hunk according to the specified configuration.
+ * Also advance xscr if the first changes must be discareded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) {
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) {
 	xdchange_t *xch, *xchp;
 	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
+	long ignorable_context = max_common / 2 - 1;
+	int interesting = 0;

-	for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
-		if (xch->i1 - (xchp->i1 + xchp->chg1) > max_common)
-			break;
+	for (xchp = *xscr, xch = (*xscr)->next; xch; xchp = xch, xch = xch->next) {
+		long thresh;
+		if (xchp->ignore || xch->ignore)
+			thresh = ignorable_context;
+		else
+			thresh = max_common;
+
+		if (!xchp->ignore)
+			interesting = 1;
+
+		if (xch->i1 - (xchp->i1 + xchp->chg1) > thresh) {
+			if (interesting)
+				break;
+			else
+				*xscr = xch;
+		}
+	}
+
+	if (!interesting && xchp->ignore)
+		*xscr = NULL;

 	return xchp;
 }
@@ -139,7 +159,9 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		return xdl_emit_common(xe, xscr, ecb, xecfg);

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;

 		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
 		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
diff --git a/xdiff/xemit.h b/xdiff/xemit.h
index c2e2e83..d297107 100644
--- a/xdiff/xemit.h
+++ b/xdiff/xemit.h
@@ -27,7 +27,7 @@
 typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			   xdemitconf_t const *xecfg);

-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg);

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 9504eae..c047376 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -143,6 +143,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
 	return nl + 1;
 }

+int xdl_blankline(const char *line, long flags)
+{
+	long i;
+
+	if (!(flags & XDF_WHITESPACE_FLAGS))
+		return (*line == '\n');
+
+	for (i = 0; line[i] != '\n' && XDL_ISSPACE(line[i]); i++)
+		;
+
+	return (line[i] == '\n');
+}
+
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
 	int i1, i2;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index ad1428e..b9cceff 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -32,6 +32,7 @@ int xdl_cha_init(chastore_t *cha, long isize, long icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
+int xdl_blankline(const char *line, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
 unsigned int xdl_hashbits(unsigned int size);
--
1.7.9.5

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-05-26 17:58 [PATCH] diff: add --ignore-blank-lines option Antoine Pelisse
@ 2013-05-26 20:35 ` Johannes Sixt
  2013-05-27  7:14   ` Antoine Pelisse
  2013-06-04 18:26 ` Junio C Hamano
  1 sibling, 1 reply; 26+ messages in thread
From: Johannes Sixt @ 2013-05-26 20:35 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Am 26.05.2013 19:58, schrieb Antoine Pelisse:
> The goal of the patch is to introduce the GNU diff
> -B/--ignore-blank-lines as closely as possible. The short option is not
> available because it's already used for "break-rewrites".
> 
> When this option is used, git-diff will not create hunks that simply
> adds or removes empty lines, but will still show empty lines
> addition/suppression if they are close enough to "valuable" changes.

So when an addition or removal of a blank line appears in a hunk that
also has non-blank-line changes, the addition or removal is not treated
specially?

How is a blank line defined? What happens if a line that has only
whitespace is added or removed? I'm thinking of diffs of files with CRLF
line breaks, where the CR would count as whitespace in the line, I think.

> +--ignore-blank-lines::
> +	Ignore changes whose lines are all blank.

I think this is too terse and does not convey what the option really does.

> +test_expect_success 'ignore-blank-lines: only new lines' '
> +	seq 5 >x &&

Please use test_seq instead of seq in all new tests.

-- Hannes

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-05-26 20:35 ` Johannes Sixt
@ 2013-05-27  7:14   ` Antoine Pelisse
  2013-06-01  8:48     ` Antoine Pelisse
  0 siblings, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-05-27  7:14 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git

On Sun, May 26, 2013 at 10:35 PM, Johannes Sixt <j6t@kdbg.org> wrote:
> Am 26.05.2013 19:58, schrieb Antoine Pelisse:
>> The goal of the patch is to introduce the GNU diff
>> -B/--ignore-blank-lines as closely as possible. The short option is not
>> available because it's already used for "break-rewrites".
>>
>> When this option is used, git-diff will not create hunks that simply
>> adds or removes empty lines, but will still show empty lines
>> addition/suppression if they are close enough to "valuable" changes.
>
> So when an addition or removal of a blank line appears in a hunk that
> also has non-blank-line changes, the addition or removal is not treated
> specially?

Exactly.

> How is a blank line defined? What happens if a line that has only
> whitespace is added or removed?

xdl_blankline() is the best description of what I considered a blank line.
If no --ignore-space-* option is given, it's a line that starts and
ends with '\n'.
If any --ignore-space-* option is given, it's a line that has any
number of isspace(3)-defined characters, followed by '\n'.

> I'm thinking of diffs of files with CRLF

Good you did, because I didn't ;-)

> line breaks, where the CR would count as whitespace in the line, I think.

With the current implementation, an empty line with CRLF will not show
as a blank line if no space option is given. As CR is a space
according to isspace(3), the line will be removed with any space
option.

>> +--ignore-blank-lines::
>> +     Ignore changes whose lines are all blank.
>
> I think this is too terse and does not convey what the option really does.

That's the description from GNU diff man page. But indeed it could be
more precise.

>> +test_expect_success 'ignore-blank-lines: only new lines' '
>> +     seq 5 >x &&
>
> Please use test_seq instead of seq in all new tests.

Will fix.

> -- Hannes
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-05-27  7:14   ` Antoine Pelisse
@ 2013-06-01  8:48     ` Antoine Pelisse
  0 siblings, 0 replies; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-01  8:48 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git

On Mon, May 27, 2013 at 9:14 AM, Antoine Pelisse <apelisse@gmail.com> wrote:
> On Sun, May 26, 2013 at 10:35 PM, Johannes Sixt <j6t@kdbg.org> wrote:
>> I'm thinking of diffs of files with CRLF
>
> Good you did, because I didn't ;-)
>
>> line breaks, where the CR would count as whitespace in the line, I think.
>
> With the current implementation, an empty line with CRLF will not show
> as a blank line if no space option is given. As CR is a space
> according to isspace(3), the line will be removed with any space
> option.

Maybe it would be worth adding the diff(1) "--strip-trailing-cr"
option. So that you could remove empty lines if you have dos line
endings while caring about eol space-changes (or other space changes).

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-05-26 17:58 [PATCH] diff: add --ignore-blank-lines option Antoine Pelisse
  2013-05-26 20:35 ` Johannes Sixt
@ 2013-06-04 18:26 ` Junio C Hamano
  2013-06-04 19:08   ` Antoine Pelisse
  1 sibling, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2013-06-04 18:26 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) {
>  	xdchange_t *xch, *xchp;
>  	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
> +	long ignorable_context = max_common / 2 - 1;

Could you explain how this math works?  Also the logic to use it
when either the previous or the current one is "blank only" (as
opposed to having two different settings for "both are blank only"
and "only one of them is, and the other is not", for example)?

The normal case when neither is blank only, we refrain from
collapsing two adjacent xdchanges if the end of xchp (i.e. the
previous one) is before the beginning of xch (i.e. the current one
we are looking at) by more than max_common lines, which makes sense
to me because we count one ctxlen for the trailing context for xchp,
interhunkctxlen to force collapsing, and another ctxlen for the
leading context for xch.

When we have 

    - zero or more "blank only" changes, followed by
    - a meaningful change A, followed by
    - zero or more blank-only changes, followed by
    - a meaningful change C,

we may want to have either two hunks (A with context around it, and
C with context around it) or a single hunk (precontext before A, all
the lines from the beginning of A to the end of C, and postcontext
after C).  In either case, we want to discard the leading "blank
only" changes.

I can sort-of see how the leading "blank only" changes are discarded
in the loop (but not quite---you can ignore everything without any
"thresh", until you set "interesting" to true, i.e. seeing A, no?).

It is not clear to me how you are counting the distance between the
end of A and the beginning of C, which I think is all that matters,
to make the decision to coalesce (or not to coalesce) the above into
a single hunk, without looking ahead to find the next xdchange that
is not marked with xch->ignore flag (that is, when looking at A,
find the beginning of C to see if C.begin-A.end is within the usual
max_common).

Confused...

> +	int interesting = 0;
>
> -	for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
> -		if (xch->i1 - (xchp->i1 + xchp->chg1) > max_common)
> -			break;
> +	for (xchp = *xscr, xch = (*xscr)->next; xch; xchp = xch, xch = xch->next) {
> +		long thresh;
> +		if (xchp->ignore || xch->ignore)
> +			thresh = ignorable_context;
> +		else
> +			thresh = max_common;
> +
> +		if (!xchp->ignore)
> +			interesting = 1;
> +
> +		if (xch->i1 - (xchp->i1 + xchp->chg1) > thresh) {
> +			if (interesting)
> +				break;
> +			else
> +				*xscr = xch;
> +		}
> +	}
> +
> +	if (!interesting && xchp->ignore)
> +		*xscr = NULL;
>
>  	return xchp;
>  }
> @@ -139,7 +159,9 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
>  		return xdl_emit_common(xe, xscr, ecb, xecfg);
>
>  	for (xch = xscr; xch; xch = xche->next) {
> -		xche = xdl_get_hunk(xch, xecfg);
> +		xche = xdl_get_hunk(&xch, xecfg);
> +		if (!xch)
> +			break;
>
>  		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
>  		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
> diff --git a/xdiff/xemit.h b/xdiff/xemit.h
> index c2e2e83..d297107 100644
> --- a/xdiff/xemit.h
> +++ b/xdiff/xemit.h
> @@ -27,7 +27,7 @@
>  typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
>  			   xdemitconf_t const *xecfg);
>
> -xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg);
>  int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
>  		  xdemitconf_t const *xecfg);
>
> diff --git a/xdiff/xutils.c b/xdiff/xutils.c
> index 9504eae..c047376 100644
> --- a/xdiff/xutils.c
> +++ b/xdiff/xutils.c
> @@ -143,6 +143,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
>  	return nl + 1;
>  }
>
> +int xdl_blankline(const char *line, long flags)
> +{
> +	long i;
> +
> +	if (!(flags & XDF_WHITESPACE_FLAGS))
> +		return (*line == '\n');
> +
> +	for (i = 0; line[i] != '\n' && XDL_ISSPACE(line[i]); i++)
> +		;
> +
> +	return (line[i] == '\n');
> +}
> +
>  int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
>  {
>  	int i1, i2;
> diff --git a/xdiff/xutils.h b/xdiff/xutils.h
> index ad1428e..b9cceff 100644
> --- a/xdiff/xutils.h
> +++ b/xdiff/xutils.h
> @@ -32,6 +32,7 @@ int xdl_cha_init(chastore_t *cha, long isize, long icount);
>  void xdl_cha_free(chastore_t *cha);
>  void *xdl_cha_alloc(chastore_t *cha);
>  long xdl_guess_lines(mmfile_t *mf, long sample);
> +int xdl_blankline(const char *line, long flags);
>  int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
>  unsigned long xdl_hash_record(char const **data, char const *top, long flags);
>  unsigned int xdl_hashbits(unsigned int size);
> --
> 1.7.9.5

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-04 18:26 ` Junio C Hamano
@ 2013-06-04 19:08   ` Antoine Pelisse
  2013-06-04 20:46     ` Junio C Hamano
  0 siblings, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-04 19:08 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, Jun 4, 2013 at 8:26 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Antoine Pelisse <apelisse@gmail.com> writes:
>
>> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) {
>>       xdchange_t *xch, *xchp;
>>       long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
>> +     long ignorable_context = max_common / 2 - 1;
>
> Could you explain how this math works?

I think it doesn't, mostly because I misinterpreted the "interhunkctxlen".
I will try to think about that and provide a reroll.

Thanks for the review and analysis.
Antoine,

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-04 19:08   ` Antoine Pelisse
@ 2013-06-04 20:46     ` Junio C Hamano
  2013-06-04 20:51       ` Antoine Pelisse
  2013-06-08 20:44       ` Antoine Pelisse
  0 siblings, 2 replies; 26+ messages in thread
From: Junio C Hamano @ 2013-06-04 20:46 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> On Tue, Jun 4, 2013 at 8:26 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Antoine Pelisse <apelisse@gmail.com> writes:
>>
>>> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) {
>>>       xdchange_t *xch, *xchp;
>>>       long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
>>> +     long ignorable_context = max_common / 2 - 1;
>>
>> Could you explain how this math works?
>
> I think it doesn't, mostly because I misinterpreted the "interhunkctxlen".
> I will try to think about that and provide a reroll.

OK.  Thanks.

I think the logic would be more like:

 1. Start from xscr, find the first xchp that is !xchp->ignore;
    if there is none, we are done. There is no more to show.

 2. Remember the xchp as the beginning.

 3. Tangle ->next pointer to find the next xch that is !xch->ignore;
    if there is none, we are also done.  xdchanges between the
    beginning you remembered in the step 2. and your current xchp
    are the only things we want to show.

 4. Measure the distance between the end of xchp and the beginning
    of xch.

    - If it is larger than max_common, xdchanges between the
      beginning you remembered in the step 2. and your current xchp
      are the only things we want to show.  The next iteration will
      start by skipping the blank-only changes between xchp and xch.

    - If it is short enough, assign xchp = xch and go back to 3. to
      find more interesting hunks (that is why we remembered the
      real "beginning" in step 2.).

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-04 20:46     ` Junio C Hamano
@ 2013-06-04 20:51       ` Antoine Pelisse
  2013-06-08 20:44       ` Antoine Pelisse
  1 sibling, 0 replies; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-04 20:51 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, Jun 4, 2013 at 10:46 PM, Junio C Hamano <gitster@pobox.com> wrote:
> OK.  Thanks.
>
> I think the logic would be more like:
>
>  1. Start from xscr, find the first xchp that is !xchp->ignore;
>     if there is none, we are done. There is no more to show.
>
>  2. Remember the xchp as the beginning.
>
>  3. Tangle ->next pointer to find the next xch that is !xch->ignore;
>     if there is none, we are also done.  xdchanges between the
>     beginning you remembered in the step 2. and your current xchp
>     are the only things we want to show.
>
>  4. Measure the distance between the end of xchp and the beginning
>     of xch.
>
>     - If it is larger than max_common, xdchanges between the
>       beginning you remembered in the step 2. and your current xchp
>       are the only things we want to show.  The next iteration will
>       start by skipping the blank-only changes between xchp and xch.
>
>     - If it is short enough, assign xchp = xch and go back to 3. to
>       find more interesting hunks (that is why we remembered the
>       real "beginning" in step 2.).

Yeah, I'm doing something pretty much like that right now (though I
will have to eventually sleep).
I decided that it would indeed be easier to split the logic rather
than do everything in one loop.

Thanks for the help !

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] diff: add --ignore-blank-lines option
  2013-06-04 20:46     ` Junio C Hamano
  2013-06-04 20:51       ` Antoine Pelisse
@ 2013-06-08 20:44       ` Antoine Pelisse
  2013-06-09  7:33         ` Eric Sunshine
  2013-06-09 20:07         ` Junio C Hamano
  1 sibling, 2 replies; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-08 20:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Antoine Pelisse

The goal of the patch is to introduce the GNU diff
-B/--ignore-blank-lines as closely as possible. The short option is not
available because it's already used for "break-rewrites".

When this option is used, git-diff will not create hunks that simply
adds or removes empty lines, but will still show empty lines
addition/suppression if they are close enough to "valuable" changes.

There are two differences between this option and GNU diff -B option:
- GNU diff doesn't have "--inter-hunk-context", so this must be handled
- The following sequence looks like a bug (context is displayed twice):

    $ seq 5 >file1
    $ cat <<EOF >file2
    change
    1
    2

    3
    4
    5
    change
    EOF
    $ diff -u -B file1 file2
    --- file1	2013-06-08 22:13:04.471517834 +0200
    +++ file2	2013-06-08 22:13:23.275517855 +0200
    @@ -1,5 +1,7 @@
    +change
     1
     2
    +
     3
     4
     5
    @@ -3,3 +5,4 @@
     3
     4
     5
    +change

So here is a more thorough description of the option:
- real changes are interesting
- blank lines that are close enough (less than context size) to
interesting changes are considered interesting (recursive definition)
- "context" lines are used around each hunk of interesting changes
- If two hunks are separated by less than "inter-hunk-context", they
will be merged into one.

The current implementation does the "interesting changes selection" in a
single pass.

Signed-off-by: Antoine Pelisse <apelisse@gmail.com>
---
Hi,

On Tue, Jun 4, 2013 at 10:46 PM, Junio C Hamano <gitster@pobox.com> wrote:
> OK. Thanks.
>
> I think the logic would be more like:
>
>  1. Start from xscr, find the first xchp that is !xchp->ignore;
>     if there is none, we are done. There is no more to show.
>
>  2. Remember the xchp as the beginning.
>
>  3. Tangle ->next pointer to find the next xch that is !xch->ignore;
>     if there is none, we are also done.  xdchanges between the
>     beginning you remembered in the step 2. and your current xchp
>     are the only things we want to show.
>
>  4. Measure the distance between the end of xchp and the beginning
>     of xch.
>
>     - If it is larger than max_common, xdchanges between the
>       beginning you remembered in the step 2. and your current xchp
>       are the only things we want to show.  The next iteration will
>       start by skipping the blank-only changes between xchp and xch.
>
>     - If it is short enough, assign xchp = xch and go back to 3. to
>       find more interesting hunks (that is why we remembered the
>       real "beginning" in step 2.).

Actually it doesn't quite work like that because we don't totally ignore
"blank lines". We want to keep them if they are close enough to other
changes.

I've tried to improve the number of tests as it helped me during
implementation, and to give a better description of the feature.

The initial reroll was meant to simplify xdl_get_hunk() but I'm afraid
it became kind of "voodoo code".  I'm not sure if I should provide some
more comments about it and where.

Please let me know if something is not working as expected.

Cheers, Antoine

 Documentation/diff-options.txt |    3 +
 diff.c                         |    2 +
 t/t4015-diff-whitespace.sh     |  282 ++++++++++++++++++++++++++++++++++++++++
 xdiff/xdiff.h                  |    2 +
 xdiff/xdiffi.c                 |   29 ++++-
 xdiff/xdiffi.h                 |    1 +
 xdiff/xemit.c                  |   47 ++++++-
 xdiff/xemit.h                  |    2 +-
 xdiff/xutils.c                 |   13 ++
 xdiff/xutils.h                 |    1 +
 10 files changed, 374 insertions(+), 8 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index b8a9b86..4e042d9 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -439,6 +439,9 @@ endif::git-format-patch[]
 	differences even if one line has whitespace where the other
 	line has none.

+--ignore-blank-lines::
+	Ignore changes whose lines are all blank.
+
 --inter-hunk-context=<lines>::
 	Show the context between diff hunks, up to the specified number
 	of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index f0b3e7c..208094f 100644
--- a/diff.c
+++ b/diff.c
@@ -3593,6 +3593,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
+	else if (!strcmp(arg, "--ignore-blank-lines"))
+		DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index cc3db13..fc77713 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -142,6 +142,288 @@ EOF
 git diff --ignore-space-at-eol > out
 test_expect_success 'another test, with --ignore-space-at-eol' 'test_cmp expect out'

+test_expect_success 'ignore-blank-lines: only new lines' '
+	test_seq 5 >x &&
+	git update-index x &&
+	test_seq 5 | sed "/3/i \\
+" >x &&
+	git diff --ignore-blank-lines >out &&
+	>expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: only new lines with space' '
+	test_seq 5 >x &&
+	git update-index x &&
+	test_seq 5 | sed "/3/i \ " >x &&
+	git diff -w --ignore-blank-lines >out &&
+	>expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: after change' '
+	test_seq 7 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+
+	5
+	6
+	7
+
+	EOF
+	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,7 +1,10 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	+
+	 5
+	 6
+	 7
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: before change' '
+	test_seq 7 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+
+	1
+	2
+	3
+
+	4
+	5
+
+	6
+	7
+	change
+	EOF
+	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,7 +2,10 @@
+	 1
+	 2
+	 3
+	+
+	 4
+	 5
+	+
+	 6
+	 7
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+
+	6
+	7
+	8
+	9
+
+	10
+	change
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,5 +1,7 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	@@ -7,4 +10,6 @@
+	 7
+	 8
+	 9
+	+
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes (with interhunkctx)' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+
+	6
+	7
+	8
+	9
+
+	10
+	change
+	EOF
+	git diff --inter-hunk-context=2 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,10 +1,15 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	+
+	 6
+	 7
+	 8
+	 9
+	+
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: scattered spaces' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+	3
+
+	4
+
+	5
+
+	6
+
+	7
+
+	8
+	9
+	10
+	change
+	EOF
+	git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,3 +1,4 @@
+	+change
+	 1
+	 2
+	 3
+	@@ -8,3 +14,4 @@
+	 8
+	 9
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: mix changes and blank lines' '
+	test_seq 16 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+	change
+	6
+	7
+	8
+
+	9
+	10
+	11
+	change
+	12
+	13
+	14
+
+	15
+	16
+	change
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,8 +1,11 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	+change
+	 6
+	 7
+	 8
+	@@ -9,8 +13,11 @@
+	 9
+	 10
+	 11
+	+change
+	 12
+	 13
+	 14
+	+
+	 15
+	 16
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
 test_expect_success 'check mixed spaces and tabs in indent' '

 	# This is indented with SP HT SP.
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 219a3bb..c033991 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -39,6 +39,8 @@ extern "C" {
 #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
 #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)

+#define XDF_IGNORE_BLANK_LINES (1 << 7)
+
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index b2eb6db..2358a2d 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -394,6 +394,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 	xch->i2 = i2;
 	xch->chg1 = chg1;
 	xch->chg2 = chg2;
+	xch->ignore = 0;

 	return xch;
 }
@@ -544,7 +545,9 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	xdchange_t *xch, *xche;

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;
 		if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
 				     xch->i2, xche->i2 + xche->chg2 - xch->i2,
 				     ecb->priv) < 0)
@@ -553,6 +556,27 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	return 0;
 }

+static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+{
+	xdchange_t *xch;
+
+	for (xch = xscr; xch; xch = xch->next) {
+		int ignore = 1;
+		xrecord_t **rec;
+		long i;
+
+		rec = &xe->xdf1.recs[xch->i1];
+		for (i = 0; i < xch->chg1 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+		rec = &xe->xdf2.recs[xch->i2];
+		for (i = 0; i < xch->chg2 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+		xch->ignore = ignore;
+	}
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
@@ -571,6 +595,9 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		return -1;
 	}
 	if (xscr) {
+		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
+			xdl_mark_ignorable(xscr, &xe, xpp->flags);
+
 		if (ef(&xe, xscr, ecb, xecfg) < 0) {

 			xdl_free_script(xscr);
diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h
index 7a92ea9..8b81206 100644
--- a/xdiff/xdiffi.h
+++ b/xdiff/xdiffi.h
@@ -41,6 +41,7 @@ typedef struct s_xdchange {
 	struct s_xdchange *next;
 	long i1, i2;
 	long chg1, chg2;
+	int ignore;
 } xdchange_t;


diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index d11dbf9..04b4bb1 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -56,16 +56,49 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 /*
  * Starting at the passed change atom, find the latest change atom to be included
  * inside the differential hunk according to the specified configuration.
+ * Also advance xscr if the first changes must be discareded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) {
-	xdchange_t *xch, *xchp;
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) {
+	xdchange_t *xch, *xchp, *lxch;
 	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
+	long max_ignorable = xecfg->ctxlen;
+	int interesting = 1;

-	for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
-		if (xch->i1 - (xchp->i1 + xchp->chg1) > max_common)
+	/* remove ignorable changes that are too far before other changes */
+	for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
+		xch = xchp->next;
+
+		if (xch == NULL ||
+		    xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
+			*xscr = xch;
+	}
+
+	if (*xscr == NULL)
+		return NULL;
+
+	lxch = *xscr;
+
+	for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
+		long distance = xch->i1 - (xchp->i1 + xchp->chg1);
+		if (distance > max_common)
 			break;

-	return xchp;
+		if (distance < max_ignorable && (!xch->ignore || interesting)) {
+			lxch = xch;
+			interesting = 1;
+		} else if (!interesting) {
+			if (xch->i1 - (lxch->i1 + lxch->chg1) < max_common)
+				continue;
+			else
+				break;
+		} else if (!xch->ignore) {
+			lxch = xch;
+		} else {
+			interesting = 0;
+		}
+	}
+
+	return lxch;
 }


@@ -139,7 +172,9 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		return xdl_emit_common(xe, xscr, ecb, xecfg);

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;

 		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
 		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
diff --git a/xdiff/xemit.h b/xdiff/xemit.h
index c2e2e83..d297107 100644
--- a/xdiff/xemit.h
+++ b/xdiff/xemit.h
@@ -27,7 +27,7 @@
 typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			   xdemitconf_t const *xecfg);

-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg);

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 9504eae..62cb23d 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -143,6 +143,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
 	return nl + 1;
 }

+int xdl_blankline(const char *line, long size, long flags)
+{
+	long i;
+
+	if (!(flags & XDF_WHITESPACE_FLAGS))
+		return (size <= 1);
+
+	for (i = 0; i < size && XDL_ISSPACE(line[i]); i++)
+		;
+
+	return (i == size);
+}
+
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
 	int i1, i2;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index ad1428e..4646ce5 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -32,6 +32,7 @@ int xdl_cha_init(chastore_t *cha, long isize, long icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
+int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
 unsigned int xdl_hashbits(unsigned int size);
--
1.7.9.5

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-08 20:44       ` Antoine Pelisse
@ 2013-06-09  7:33         ` Eric Sunshine
  2013-06-09 20:07         ` Junio C Hamano
  1 sibling, 0 replies; 26+ messages in thread
From: Eric Sunshine @ 2013-06-09  7:33 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: Junio C Hamano, Git List

On Sat, Jun 8, 2013 at 4:44 PM, Antoine Pelisse <apelisse@gmail.com> wrote:
> The goal of the patch is to introduce the GNU diff
> -B/--ignore-blank-lines as closely as possible. The short option is not
> available because it's already used for "break-rewrites".
>
> When this option is used, git-diff will not create hunks that simply
> adds or removes empty lines, but will still show empty lines

s/hunks/a hunk/
...or...
s/adds or removes/add or remove/

> addition/suppression if they are close enough to "valuable" changes.
>
> diff --git a/xdiff/xemit.c b/xdiff/xemit.c
> index d11dbf9..04b4bb1 100644
> --- a/xdiff/xemit.c
> +++ b/xdiff/xemit.c
> @@ -56,16 +56,49 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
>  /*
>   * Starting at the passed change atom, find the latest change atom to be included
>   * inside the differential hunk according to the specified configuration.
> + * Also advance xscr if the first changes must be discareded.
>   */

s/discareded/discarded/

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-08 20:44       ` Antoine Pelisse
  2013-06-09  7:33         ` Eric Sunshine
@ 2013-06-09 20:07         ` Junio C Hamano
  2013-06-09 20:32           ` Antoine Pelisse
  2013-06-10 21:03           ` Antoine Pelisse
  1 sibling, 2 replies; 26+ messages in thread
From: Junio C Hamano @ 2013-06-09 20:07 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> The goal of the patch is to introduce the GNU diff
> -B/--ignore-blank-lines as closely as possible. The short option is not
> available because it's already used for "break-rewrites".
>
> When this option is used, git-diff will not create hunks that simply
> adds or removes empty lines, but will still show empty lines
> addition/suppression if they are close enough to "valuable" changes.
>
> There are two differences between this option and GNU diff -B option:
> - GNU diff doesn't have "--inter-hunk-context", so this must be handled
> - The following sequence looks like a bug (context is displayed twice):
>
>     $ seq 5 >file1
>     $ cat <<EOF >file2
>     change
>     1
>     2
>
>     3
>     4
>     5
>     change
>     EOF
>     $ diff -u -B file1 file2
>     --- file1	2013-06-08 22:13:04.471517834 +0200
>     +++ file2	2013-06-08 22:13:23.275517855 +0200
>     @@ -1,5 +1,7 @@
>     +change
>      1
>      2
>     +
>      3
>      4
>      5
>     @@ -3,3 +5,4 @@
>      3
>      4
>      5
>     +change

Yes, this is a bug in the previous round, and the approach I
outlined in the previous message was also designed to address it by
coalescing adjacent hunks by measuring the distance correctly.

> Actually it doesn't quite work like that because we don't totally ignore
> "blank lines". We want to keep them if they are close enough to other
> changes.

A new test vector in your patch is a good illustration of this.

> +test_expect_success 'ignore-blank-lines: after change' '
> +	test_seq 7 >x &&
> +	git update-index x &&
> +	cat <<-\EOF >x &&
> +	change
> +	1
> +	2
> +
> +	3
> +	4
> +
> +	5
> +	6
> +	7
> +
> +	EOF

The test makes the original with 1 thru 7 to the above shape.  The
argument for the behaviour in this patch is that additions of these
new blank lines are close enough to the real change of inserting the
first line with "change".  

If you are not interested in changes in additions of blank lines (by
the way, do we also handle deletions and do your new tests check
them?), one could however argue that the user would want not to see
the addition of the blank between 4 and 5 or after 7.

At first glance, it seems impossible to express that if we need to
show three lines of context, in other words, this output

	@@ -1,2 +1,3 @@
	+change
         1
         2

cannot be a correct patch output --ignore-blank-lines-change output
because it does not show enough context lines after the real change
(we want 3 lines).
 
However, let's step back and think what other ignore blank options do.

When any ignore blank option is used, there will be lines that
actually has changes (hence should be shown with +/-) but we
deliberately ignore their changes (hence, if they ever appear in the
hunk, they do so as context lines prefixed with SP not +/-).  When
we do so, we show the lines from the postimage in the context.

So in that sense, showing this would actually be acceptable (the
last postcontext line in this hunk is a blank line).

	@@ -1,3 +1,4 @@
	+change
         1
         2
	  

We are showing the new blank line the change added after 2 as a
shared context, following the same principle to show from the
postimage when we turned a line with a real change into a
non-change.

> +	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
> +	cat <<-\EOF >expected &&
> +	diff --git a/x b/x
> +	--- a/x
> +	+++ b/x
> +	@@ -1,7 +1,10 @@
> +	+change
> +	 1
> +	 2
> +	+
> +	 3
> +	 4
> +	+
> +	 5
> +	 6
> +	 7
> +	EOF
> +	compare_diff_patch expected out.tmp
> +'

And from that point of view, this expected output may be excessively
noisy.

So I dunno.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-09 20:07         ` Junio C Hamano
@ 2013-06-09 20:32           ` Antoine Pelisse
  2013-06-09 21:28             ` Junio C Hamano
  2013-06-10 21:03           ` Antoine Pelisse
  1 sibling, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-09 20:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, Jun 9, 2013 at 10:07 PM, Junio C Hamano <gitster@pobox.com> wrote:
> by
> the way, do we also handle deletions and do your new tests check
> them?

As stated in the commit message, yes we should, but we don't have
tests for that.
I will need to add some as I think I found a bug when removing blank lines.

>> +     git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
>> +     cat <<-\EOF >expected &&
>> +     diff --git a/x b/x
>> +     --- a/x
>> +     +++ b/x
>> +     @@ -1,7 +1,10 @@
>> +     +change
>> +      1
>> +      2
>> +     +
>> +      3
>> +      4
>> +     +
>> +      5
>> +      6
>> +      7
>> +     EOF
>> +     compare_diff_patch expected out.tmp
>> +'
>
> And from that point of view, this expected output may be excessively
> noisy.
>
> So I dunno.

It might be kind of noisy, but I think trying to improve the solution
might lead to over-engineering.
How would we compute the "minimal distance between interesting and
blank" so that the blank becomes interesting ?
Using the context size for that is quite convenient, while creating
another variable would probably become overkill..

The original goal is to remove hunks created solely for
addition/suppression, and I think it's what it should do for the
moment.
But of course, I have no strong opinion about that.

And by the way, I have also another bug, so you can expect another
re-roll (sorry about that, it's more complex than I initially
thought).

Thanks a lot,
Antoine

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-09 20:32           ` Antoine Pelisse
@ 2013-06-09 21:28             ` Junio C Hamano
  0 siblings, 0 replies; 26+ messages in thread
From: Junio C Hamano @ 2013-06-09 21:28 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> It might be kind of noisy, but I think trying to improve the solution
> might lead to over-engineering.
> How would we compute the "minimal distance between interesting and
> blank" so that the blank becomes interesting ?
> Using the context size for that is quite convenient, while creating
> another variable would probably become overkill..
>
> The original goal is to remove hunks created solely for
> addition/suppression, and I think it's what it should do for the
> moment.

Something like this on top of your original one is what I had in
mind as a starting point.

 t/t4015-diff-whitespace.sh |  5 +----
 xdiff/xemit.c              | 45 ++++++++++++++++++++++++++-------------------
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index b3c4fcc..acc2159 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -185,7 +185,7 @@ test_expect_success 'ignore-blank-lines: with changes' '
 	git diff --ignore-blank-lines >out.tmp &&
 	sed -e "1,/^+++ b\/x/d" <out.tmp >out &&
 	cat <<-\EOF >expect &&
-	@@ -1,6 +2,7 @@
+	@@ -1,11 +2,14 @@
 	 1
 	 2
 	 3
@@ -193,9 +193,6 @@ test_expect_success 'ignore-blank-lines: with changes' '
 	 4
 	 5
 	 6
-	@@ -5,7 +7,9 @@
-	 5
-	 6
 	 7
 	+
 	 8
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 52dfef8..27e1105 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -59,32 +59,39 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
  * Also advance xscr if the first changes must be discareded.
  */
 xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg) {
-	xdchange_t *xch, *xchp;
+	xdchange_t *xch, *xchp = NULL, *xch_start = NULL;
 	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
-	long ignorable_context = max_common / 2 - 1;
-	int interesting = 0;
 
-	for (xchp = *xscr, xch = (*xscr)->next; xch; xchp = xch, xch = xch->next) {
-		long thresh;
-		if (xchp->ignore || xch->ignore)
-			thresh = ignorable_context;
-		else
-			thresh = max_common;
-
-		if (!xchp->ignore)
-			interesting = 1;
+	/* Skip the ones that can be ignored from the beginning */
+	for (xch = *xscr; xch; xch = xch->next) {
+		if (xch->ignore)
+			continue;
+		xch_start = xch;
+		break;
+	}
 
-		if (xch->i1 - (xchp->i1 + xchp->chg1) > thresh) {
-			if (interesting)
+	for (xchp = xch_start; xchp; ) {
+		/* Find the next one that is not ignored */
+		for (xch = xchp->next; xch; xch = xch->next)
+			if (!xch->ignore)
 				break;
-			else
-				*xscr = xch;
+		if (!xch)
+			break; /* show xch_start thru xchp */
+
+		/* are these hunks close enough? */
+		if ((xchp->i1 + xchp->chg1) - xch->i1 < max_common) {
+			xchp = xch;
+			continue;
 		}
-	}
 
-	if (!interesting && xchp->ignore)
-		*xscr = NULL;
+		/*
+		 * otherwise, xchp is the last one (inclusive) we want
+		 * to coalesce into a single output hunk.
+		 */
+		break;
+	}
 
+	*xscr = xch_start;
 	return xchp;
 }
 
-- 
1.8.3-477-gc2fede3

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-09 20:07         ` Junio C Hamano
  2013-06-09 20:32           ` Antoine Pelisse
@ 2013-06-10 21:03           ` Antoine Pelisse
  2013-06-10 21:43             ` Junio C Hamano
  1 sibling, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-10 21:03 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, Jun 9, 2013 at 10:07 PM, Junio C Hamano <gitster@pobox.com> wrote:
> When any ignore blank option is used, there will be lines that
> actually has changes (hence should be shown with +/-) but we
> deliberately ignore their changes (hence, if they ever appear in the
> hunk, they do so as context lines prefixed with SP not +/-).  When
> we do so, we show the lines from the postimage in the context.

Don't we actually use preimage (see below) ? I think using pre-image
allows the patch to be applicable to another tree (but ignoring the
space changes).
If we actually hide new blank lines that are in the context, it means
that we won't be able to apply a patch with 2 new blank lines in the 3
line context.

Anyway, I'm starting to think that "show blank lines changes near
other changes" makes sense more and more sense.
By the way I have a patch I *think* is working, but I will check it
another thousand times before sending.

Cheers,
Antoine

$ git diff
diff --git a/x b/x
index e562137..226e35a 100644
--- a/x
+++ b/x
@@ -4,8 +4,9 @@ change
 3
 4
 5
-6
-7
-8
-9
+   6
+7
+change
+  8
+9
 10

$ git diff -w
diff --git a/x b/x
index e562137..226e35a 100644
--- a/x
+++ b/x
@@ -6,6 +6,7 @@ change
 5
 6
 7
+change
 8
 9
 10

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-10 21:03           ` Antoine Pelisse
@ 2013-06-10 21:43             ` Junio C Hamano
  2013-06-12 13:21               ` Antoine Pelisse
  0 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2013-06-10 21:43 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> On Sun, Jun 9, 2013 at 10:07 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> When any ignore blank option is used, there will be lines that
>> actually has changes (hence should be shown with +/-) but we
>> deliberately ignore their changes (hence, if they ever appear in the
>> hunk, they do so as context lines prefixed with SP not +/-).  When
>> we do so, we show the lines from the postimage in the context.
>
> Don't we actually use preimage (see below) ? I think using pre-image
> allows the patch to be applicable to another tree (but ignoring the
> space changes).

But the result of such patch application is not usually what you
want to use.  If we use postimage (which by the way was a deliberate
design decision we made earlier), at least the review of the patch
is easier because you would see the end result more clearly.

> If we actually hide new blank lines that are in the context, it means
> that we won't be able to apply a patch with 2 new blank lines in the 3
> line context.

Yes, but I do not think the point of --ignore-blank-lines is to
produce a patch that can be applied in the first place.  It is to
allow easier eyeballing.

> Anyway, I'm starting to think that "show blank lines changes near
> other changes" makes sense more and more sense.

Probably.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-10 21:43             ` Junio C Hamano
@ 2013-06-12 13:21               ` Antoine Pelisse
  2013-06-12 17:22                 ` Junio C Hamano
  0 siblings, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-12 13:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, René Scharfe

On Mon, Jun 10, 2013 at 11:43 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Antoine Pelisse <apelisse@gmail.com> writes:
>
>> On Sun, Jun 9, 2013 at 10:07 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>> When any ignore blank option is used, there will be lines that
>>> actually has changes (hence should be shown with +/-) but we
>>> deliberately ignore their changes (hence, if they ever appear in the
>>> hunk, they do so as context lines prefixed with SP not +/-).  When
>>> we do so, we show the lines from the postimage in the context.
>>
>> Don't we actually use preimage (see below) ? I think using pre-image
>> allows the patch to be applicable to another tree (but ignoring the
>> space changes).

Answering to myself: OK, my package version of git is 1.7.9.5 while
the post-image is used since 1.7.10 or something. That explains my
confusion.

> But the result of such patch application is not usually what you
> want to use.  If we use postimage (which by the way was a deliberate
> design decision we made earlier), at least the review of the patch
> is easier because you would see the end result more clearly.

I've found the patch and discussion [1] about that switch from
pre-image to post-image, so I can understand the motives (and see that
you actually considered problems for applying such a patch). I always
felt confident that running "git send-email -w" would send a patch
(that can be applied) without the potential space errors/changes I
would have added.

I think it's unfortunate that Git does generate patches with git-diff
that can't be applied if any space option is used. I'm still not
really convinced by the pre-image to post-image change, and maybe I
would have made it a non-default option. What is done is done, but I'd
rather like not do the same here, if possible.

>> If we actually hide new blank lines that are in the context, it means
>> that we won't be able to apply a patch with 2 new blank lines in the 3
>> line context.
>
> Yes, but I do not think the point of --ignore-blank-lines is to
> produce a patch that can be applied in the first place.  It is to
> allow easier eyeballing.

I think it can not be applied because it's *hard* for a computer to
actually find the correct location, and it may be equally hard for the
reader to evaluate the change with removed/different context.

>> Anyway, I'm starting to think that "show blank lines changes near
>> other changes" makes sense more and more sense.
>
> Probably.

I'm glad to see how convinced you are ;)

I will send my patch and see what makes more sense.

[1]: $gmane/188305

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-12 13:21               ` Antoine Pelisse
@ 2013-06-12 17:22                 ` Junio C Hamano
  2013-06-15 13:01                   ` Antoine Pelisse
  0 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2013-06-12 17:22 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git, René Scharfe

Antoine Pelisse <apelisse@gmail.com> writes:

>>> Anyway, I'm starting to think that "show blank lines changes near
>>> other changes" makes sense more and more sense.
>>
>> Probably.
>
> I'm glad to see how convinced you are ;)

That is not me "not convinced".

It is merely "I do not have a strong conviction that you are wrong;
I'd have to think it (again) after re-reading your patch."

Thanks.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] diff: add --ignore-blank-lines option
  2013-06-12 17:22                 ` Junio C Hamano
@ 2013-06-15 13:01                   ` Antoine Pelisse
  2013-06-17 16:18                     ` Junio C Hamano
  0 siblings, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-15 13:01 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Antoine Pelisse

The goal of the patch is to introduce the GNU diff
-B/--ignore-blank-lines as closely as possible. The short option is not
available because it's already used for "break-rewrites".

When this option is used, git-diff will not create hunks that simply
add or remove empty lines, but will still show empty lines
addition/suppression if they are close enough to "valuable" changes.

There are two differences between this option and GNU diff -B option:
- GNU diff doesn't have "--inter-hunk-context", so this must be handled
- The following sequence looks like a bug (context is displayed twice):

    $ seq 5 >file1
    $ cat <<EOF >file2
    change
    1
    2

    3
    4
    5
    change
    EOF
    $ diff -u -B file1 file2
    --- file1	2013-06-08 22:13:04.471517834 +0200
    +++ file2	2013-06-08 22:13:23.275517855 +0200
    @@ -1,5 +1,7 @@
    +change
     1
     2
    +
     3
     4
     5
    @@ -3,3 +5,4 @@
     3
     4
     5
    +change

So here is a more thorough description of the option:
- real changes are interesting
- blank lines that are close enough (less than context size) to
interesting changes are considered interesting (recursive definition)
- "context" lines are used around each hunk of interesting changes
- If two hunks are separated by less than "inter-hunk-context", they
will be merged into one.

The current implementation does the "interesting changes selection" in a
single pass.

Signed-off-by: Antoine Pelisse <apelisse@gmail.com>
---
Hey,
OK I think this version should be fine (I don't have any bug that comes
to mind).
Please review the "between changes" test as I made a choice there that can
be controversial.

Thanks,
Antoine

 Documentation/diff-options.txt |    3 +
 diff.c                         |    2 +
 t/t4015-diff-whitespace.sh     |  308 ++++++++++++++++++++++++++++++++++++++++
 xdiff/xdiff.h                  |    2 +
 xdiff/xdiffi.c                 |   29 +++-
 xdiff/xdiffi.h                 |    1 +
 xdiff/xemit.c                  |   50 ++++++-
 xdiff/xemit.h                  |    2 +-
 xdiff/xutils.c                 |   13 ++
 xdiff/xutils.h                 |    1 +
 10 files changed, 403 insertions(+), 8 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index b8a9b86..4e042d9 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -439,6 +439,9 @@ endif::git-format-patch[]
 	differences even if one line has whitespace where the other
 	line has none.

+--ignore-blank-lines::
+	Ignore changes whose lines are all blank.
+
 --inter-hunk-context=<lines>::
 	Show the context between diff hunks, up to the specified number
 	of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index f0b3e7c..208094f 100644
--- a/diff.c
+++ b/diff.c
@@ -3593,6 +3593,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
+	else if (!strcmp(arg, "--ignore-blank-lines"))
+		DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index cc3db13..6ed6934 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -142,6 +142,314 @@ EOF
 git diff --ignore-space-at-eol > out
 test_expect_success 'another test, with --ignore-space-at-eol' 'test_cmp expect out'

+test_expect_success 'ignore-blank-lines: only new lines' '
+	test_seq 5 >x &&
+	git update-index x &&
+	test_seq 5 | sed "/3/i \\
+" >x &&
+	git diff --ignore-blank-lines >out &&
+	>expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: only new lines with space' '
+	test_seq 5 >x &&
+	git update-index x &&
+	test_seq 5 | sed "/3/i \ " >x &&
+	git diff -w --ignore-blank-lines >out &&
+	>expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: after change' '
+	cat <<-\EOF >x &&
+	1
+	2
+
+	3
+	4
+	5
+
+	6
+	7
+	EOF
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+
+	1
+	2
+	3
+	4
+	5
+	6
+
+	7
+	EOF
+	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,6 +1,7 @@
+	+change
+	+
+	 1
+	 2
+	-
+	 3
+	 4
+	 5
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: before change' '
+	cat <<-\EOF >x &&
+	1
+	2
+
+	3
+	4
+	5
+	6
+	7
+	EOF
+	git update-index x &&
+	cat <<-\EOF >x &&
+
+	1
+	2
+	3
+	4
+	5
+
+	6
+	7
+	change
+	EOF
+	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -4,5 +4,7 @@
+	 3
+	 4
+	 5
+	+
+	 6
+	 7
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes' '
+	cat <<-\EOF >x &&
+	1
+	2
+	3
+	4
+	5
+
+
+	6
+	7
+	8
+	9
+	10
+	EOF
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+	6
+	7
+	8
+
+	9
+	10
+	change
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,5 +1,7 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	@@ -8,5 +8,7 @@
+	 6
+	 7
+	 8
+	+
+	 9
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes (with interhunkctx)' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+
+	6
+	7
+	8
+	9
+
+	10
+	change
+	EOF
+	git diff --inter-hunk-context=2 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,10 +1,15 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	+
+	 6
+	 7
+	 8
+	 9
+	+
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: scattered spaces' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+	3
+
+	4
+
+	5
+
+	6
+
+
+	7
+
+	8
+	9
+	10
+	change
+	EOF
+	git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,3 +1,4 @@
+	+change
+	 1
+	 2
+	 3
+	@@ -8,3 +15,4 @@
+	 8
+	 9
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: mix changes and blank lines' '
+	test_seq 16 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+	change
+	6
+	7
+	8
+
+	9
+	10
+	11
+	change
+	12
+	13
+	14
+
+	15
+	16
+	change
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,8 +1,11 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	+change
+	 6
+	 7
+	 8
+	@@ -9,8 +13,11 @@
+	 9
+	 10
+	 11
+	+change
+	 12
+	 13
+	 14
+	+
+	 15
+	 16
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
 test_expect_success 'check mixed spaces and tabs in indent' '

 	# This is indented with SP HT SP.
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 219a3bb..c033991 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -39,6 +39,8 @@ extern "C" {
 #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
 #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)

+#define XDF_IGNORE_BLANK_LINES (1 << 7)
+
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index b2eb6db..2358a2d 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -394,6 +394,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 	xch->i2 = i2;
 	xch->chg1 = chg1;
 	xch->chg2 = chg2;
+	xch->ignore = 0;

 	return xch;
 }
@@ -544,7 +545,9 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	xdchange_t *xch, *xche;

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;
 		if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
 				     xch->i2, xche->i2 + xche->chg2 - xch->i2,
 				     ecb->priv) < 0)
@@ -553,6 +556,27 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	return 0;
 }

+static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+{
+	xdchange_t *xch;
+
+	for (xch = xscr; xch; xch = xch->next) {
+		int ignore = 1;
+		xrecord_t **rec;
+		long i;
+
+		rec = &xe->xdf1.recs[xch->i1];
+		for (i = 0; i < xch->chg1 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+		rec = &xe->xdf2.recs[xch->i2];
+		for (i = 0; i < xch->chg2 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+		xch->ignore = ignore;
+	}
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
@@ -571,6 +595,9 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		return -1;
 	}
 	if (xscr) {
+		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
+			xdl_mark_ignorable(xscr, &xe, xpp->flags);
+
 		if (ef(&xe, xscr, ecb, xecfg) < 0) {

 			xdl_free_script(xscr);
diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h
index 7a92ea9..8b81206 100644
--- a/xdiff/xdiffi.h
+++ b/xdiff/xdiffi.h
@@ -41,6 +41,7 @@ typedef struct s_xdchange {
 	struct s_xdchange *next;
 	long i1, i2;
 	long chg1, chg2;
+	int ignore;
 } xdchange_t;


diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index d11dbf9..3998a94e 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -56,16 +56,52 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 /*
  * Starting at the passed change atom, find the latest change atom to be included
  * inside the differential hunk according to the specified configuration.
+ * Also advance xscr if the first changes must be discarded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) {
-	xdchange_t *xch, *xchp;
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
+{
+	xdchange_t *xch, *xchp, *lxch;
 	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
+	long max_ignorable = xecfg->ctxlen;
+	unsigned long changes = ULONG_MAX;
+
+	/* remove ignorable changes that are too far before other changes */
+	for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
+		xch = xchp->next;
+
+		if (xch == NULL ||
+		    xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
+			*xscr = xch;
+	}
+
+	if (*xscr == NULL)
+		return NULL;
+
+	lxch = *xscr;

-	for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
-		if (xch->i1 - (xchp->i1 + xchp->chg1) > max_common)
+	for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
+		long distance = xch->i1 - (xchp->i1 + xchp->chg1);
+		if (distance > max_common)
 			break;

-	return xchp;
+		if (distance < max_ignorable &&
+		    (!xch->ignore || changes == ULONG_MAX)) {
+			lxch = xch;
+			changes = ULONG_MAX;
+		} else if (changes != ULONG_MAX &&
+			   xch->i1 + changes - (lxch->i1 + lxch->chg1) > max_common) {
+			break;
+		} else if (!xch->ignore) {
+			lxch = xch;
+			changes = ULONG_MAX;
+		} else {
+			if (changes == ULONG_MAX)
+				changes = 0;
+			changes += xch->chg2;
+		}
+	}
+
+	return lxch;
 }


@@ -139,7 +175,9 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		return xdl_emit_common(xe, xscr, ecb, xecfg);

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;

 		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
 		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
diff --git a/xdiff/xemit.h b/xdiff/xemit.h
index c2e2e83..d297107 100644
--- a/xdiff/xemit.h
+++ b/xdiff/xemit.h
@@ -27,7 +27,7 @@
 typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			   xdemitconf_t const *xecfg);

-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg);

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 9504eae..62cb23d 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -143,6 +143,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
 	return nl + 1;
 }

+int xdl_blankline(const char *line, long size, long flags)
+{
+	long i;
+
+	if (!(flags & XDF_WHITESPACE_FLAGS))
+		return (size <= 1);
+
+	for (i = 0; i < size && XDL_ISSPACE(line[i]); i++)
+		;
+
+	return (i == size);
+}
+
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
 	int i1, i2;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index ad1428e..4646ce5 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -32,6 +32,7 @@ int xdl_cha_init(chastore_t *cha, long isize, long icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
+int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
 unsigned int xdl_hashbits(unsigned int size);
--
1.7.9.5

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-15 13:01                   ` Antoine Pelisse
@ 2013-06-17 16:18                     ` Junio C Hamano
  2013-06-17 17:58                       ` Antoine Pelisse
  2013-06-17 19:09                       ` Antoine Pelisse
  0 siblings, 2 replies; 26+ messages in thread
From: Junio C Hamano @ 2013-06-17 16:18 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> So here is a more thorough description of the option:

> - real changes are interesting

OK, I think I can understand it.

> - blank lines that are close enough (less than context size) to
>   interesting changes are considered interesting (recursive definition)

OK.

> - "context" lines are used around each hunk of interesting changes

OK.

> - If two hunks are separated by less than "inter-hunk-context", they
>   will be merged into one.

Makes sense.

> The current implementation does the "interesting changes selection" in a
> single pass.

"current" meaning "the code after this patch is applied"?  Is there
a possible future enhancement hinted here?

> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
> +{
> +	xdchange_t *xch, *xchp, *lxch;
>  	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
> +	long max_ignorable = xecfg->ctxlen;
> +	unsigned long changes = ULONG_MAX;
> +
> +	/* remove ignorable changes that are too far before other changes */
> +	for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
> +		xch = xchp->next;
> +
> +		if (xch == NULL ||
> +		    xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
> +			*xscr = xch;
> +	}

This strips leading ignorable ones away until we see an unignorable
one.  Looks sane.

> +	if (*xscr == NULL)
> +		return NULL;
> +
> +	lxch = *xscr;

"lxch" remembers the last one that is "interesting".

> +	for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
> +		long distance = xch->i1 - (xchp->i1 + xchp->chg1);
> +		if (distance > max_common)
>  			break;

If we see large-enough gap, the one we processed last (in xchp) is
the end of the current hunk.  Looks sane.

> +		if (distance < max_ignorable &&
> +		    (!xch->ignore || changes == ULONG_MAX)) {
> +			lxch = xch;
> +			changes = ULONG_MAX;

The current one is made into the "last interesting one we have seen"
and the hunk continues, if either (1) the current one is interesting
by itself, or (2) the last one we saw does not match some
unexplainable criteria to cause changes set to not ULONG_MAX.

Puzzling.

> +		} else if (changes != ULONG_MAX &&
> +			   xch->i1 + changes - (lxch->i1 + lxch->chg1) > max_common) {
> +			break;

If the last one we saw does not match some unexplainable criteria to
cause changes set to not ULONG_MAX, and the distance between this
one and the last "intersting" one is further than the context, this
one will not be a part of the current hunk.

Puzzling.

Could you add comment to the "changes" variable and explain what the
variable means?

> +		} else if (!xch->ignore) {
> +			lxch = xch;
> +			changes = ULONG_MAX;

When this change by itself is interesting, it becomes the "last
interesting one" and the hunk continues.

> +		} else {
> +			if (changes == ULONG_MAX)
> +				changes = 0;
> +			changes += xch->chg2;

Puzzled beyond guessing.  Also it is curious why here and only here
we look at chg2 side of the things, not i1/chg1 in this whole thing.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-17 16:18                     ` Junio C Hamano
@ 2013-06-17 17:58                       ` Antoine Pelisse
  2013-06-17 19:09                       ` Antoine Pelisse
  1 sibling, 0 replies; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-17 17:58 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Mon, Jun 17, 2013 at 6:18 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Antoine Pelisse <apelisse@gmail.com> writes:
>
>> So here is a more thorough description of the option:
>
>> - real changes are interesting
>
> OK, I think I can understand it.
>
>> - blank lines that are close enough (less than context size) to
>>   interesting changes are considered interesting (recursive definition)
>
> OK.
>
>> - "context" lines are used around each hunk of interesting changes
>
> OK.
>
>> - If two hunks are separated by less than "inter-hunk-context", they
>>   will be merged into one.
>
> Makes sense.
>
>> The current implementation does the "interesting changes selection" in a
>> single pass.
>
> "current" meaning "the code after this patch is applied"?  Is there
> a possible future enhancement hinted here?
>
>> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
>> +{
>> +     xdchange_t *xch, *xchp, *lxch;
>>       long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
>> +     long max_ignorable = xecfg->ctxlen;
>> +     unsigned long changes = ULONG_MAX;
>> +
>> +     /* remove ignorable changes that are too far before other changes */
>> +     for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
>> +             xch = xchp->next;
>> +
>> +             if (xch == NULL ||
>> +                 xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
>> +                     *xscr = xch;
>> +     }
>
> This strips leading ignorable ones away until we see an unignorable
> one.  Looks sane.
>
>> +     if (*xscr == NULL)
>> +             return NULL;
>> +
>> +     lxch = *xscr;
>
> "lxch" remembers the last one that is "interesting".
>
>> +     for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
>> +             long distance = xch->i1 - (xchp->i1 + xchp->chg1);
>> +             if (distance > max_common)
>>                       break;
>
> If we see large-enough gap, the one we processed last (in xchp) is
> the end of the current hunk.  Looks sane.
>
>> +             if (distance < max_ignorable &&
>> +                 (!xch->ignore || changes == ULONG_MAX)) {
>> +                     lxch = xch;
>> +                     changes = ULONG_MAX;
>
> The current one is made into the "last interesting one we have seen"
> and the hunk continues, if either (1) the current one is interesting
> by itself, or (2) the last one we saw does not match some
> unexplainable criteria to cause changes set to not ULONG_MAX.
>
> Puzzling.
>
>> +             } else if (changes != ULONG_MAX &&
>> +                        xch->i1 + changes - (lxch->i1 + lxch->chg1) > max_common) {
>> +                     break;
>
> If the last one we saw does not match some unexplainable criteria to
> cause changes set to not ULONG_MAX, and the distance between this
> one and the last "intersting" one is further than the context, this
> one will not be a part of the current hunk.
>
> Puzzling.
>
> Could you add comment to the "changes" variable and explain what the
> variable means?
>
>> +             } else if (!xch->ignore) {
>> +                     lxch = xch;
>> +                     changes = ULONG_MAX;
>
> When this change by itself is interesting, it becomes the "last
> interesting one" and the hunk continues.
>
>> +             } else {
>> +                     if (changes == ULONG_MAX)
>> +                             changes = 0;
>> +                     changes += xch->chg2;
>
> Puzzled beyond guessing.  Also it is curious why here and only here
> we look at chg2 side of the things, not i1/chg1 in this whole thing.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-17 16:18                     ` Junio C Hamano
  2013-06-17 17:58                       ` Antoine Pelisse
@ 2013-06-17 19:09                       ` Antoine Pelisse
  2013-06-17 19:52                         ` Junio C Hamano
  1 sibling, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-17 19:09 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Mon, Jun 17, 2013 at 6:18 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Antoine Pelisse <apelisse@gmail.com> writes:
>
>> So here is a more thorough description of the option:
>
>> - real changes are interesting
>
> OK, I think I can understand it.
>
>> - blank lines that are close enough (less than context size) to
>>   interesting changes are considered interesting (recursive definition)
>
> OK.
>
>> - "context" lines are used around each hunk of interesting changes
>
> OK.
>
>> - If two hunks are separated by less than "inter-hunk-context", they
>>   will be merged into one.
>
> Makes sense.
>
>> The current implementation does the "interesting changes selection" in a
>> single pass.
>
> "current" meaning "the code after this patch is applied"?  Is there
> a possible future enhancement hinted here?

No. There might be, but I'm not sure it should be discussed right now
(In case you're curious, I'm thinking about interaction with combined
diff). I will take the hint and rephrase.

>> +xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
>> +{
>> +     xdchange_t *xch, *xchp, *lxch;
>>       long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
>> +     long max_ignorable = xecfg->ctxlen;
>> +     unsigned long changes = ULONG_MAX;

Let me explain what "changes" means, as I know it will help the rest
of the message:
It counts the number of *added* blank lines we have ignored since
"lxch" (needed to calculate the distance between lxch and xch)
It also has the meaning of what was called "interesting" before.
If changes == ULONG_MAX, we are still in interesting zone, otherwise
it means we have ignored "changes" *added* blank lines (0 being a
valid value).
(Actually, After rereading this part, it looks like I could check that
lxch == xchp rather than setting changes to ULONG_MAX).

>> +
>> +     /* remove ignorable changes that are too far before other changes */
>> +     for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
>> +             xch = xchp->next;
>> +
>> +             if (xch == NULL ||
>> +                 xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
>> +                     *xscr = xch;
>> +     }
>
> This strips leading ignorable ones away until we see an unignorable
> one.  Looks sane.
>
>> +     if (*xscr == NULL)
>> +             return NULL;
>> +
>> +     lxch = *xscr;
>
> "lxch" remembers the last one that is "interesting".
>
>> +     for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
>> +             long distance = xch->i1 - (xchp->i1 + xchp->chg1);
>> +             if (distance > max_common)
>>                       break;
>
> If we see large-enough gap, the one we processed last (in xchp) is
> the end of the current hunk.  Looks sane.
>
>> +             if (distance < max_ignorable &&
>> +                 (!xch->ignore || changes == ULONG_MAX)) {
>> +                     lxch = xch;
>> +                     changes = ULONG_MAX;
>
> The current one is made into the "last interesting one we have seen"
> and the hunk continues, if either (1) the current one is interesting
> by itself, or (2) the last one we saw does not match some
> unexplainable criteria to cause changes set to not ULONG_MAX.
>
> Puzzling.

- If we are still in interesting zone, we take it, even if it's
ignorable change. Because it's close enough.
- Otherwise, only take real changes. We are close to another change,
and we are still in the loop, so it must be interesting.

>> +             } else if (changes != ULONG_MAX &&
>> +                        xch->i1 + changes - (lxch->i1 + lxch->chg1) > max_common) {
>> +                     break;
>
> If the last one we saw does not match some unexplainable criteria to
> cause changes set to not ULONG_MAX, and the distance between this
> one and the last "intersting" one is further than the context, this
> one will not be a part of the current hunk.
>
> Puzzling.

If we are no longer in "interesting zone" (changes != ULONG_MAX), it
means we will stop if the distance is too big.
"changes" is used in the calculation to consider the changes we have
already ignored (xch->i1 - (lxch->i1 + lxch->chg1) will only work if
xch and lxch are consecutive, we need to add the blank lines we
ignored).

> Could you add comment to the "changes" variable and explain what the
> variable means?
>
>> +             } else if (!xch->ignore) {
>> +                     lxch = xch;
>> +                     changes = ULONG_MAX;
>
> When this change by itself is interesting, it becomes the "last
> interesting one" and the hunk continues.

Exactly, and changes goes back to "interesting".

>> +             } else {
>> +                     if (changes == ULONG_MAX)
>> +                             changes = 0;
>> +                     changes += xch->chg2;
>
> Puzzled beyond guessing.  Also it is curious why here and only here
> we look at chg2 side of the things, not i1/chg1 in this whole thing.

chg2 being the number of blank line *additions*.
I don't want to coalesce two hunks because some blank lines have been
removed between the two, so we must not change the distance
calculation because of a blank line removal. That behavior can be seen
in "ignore-blank-lines: between changes" test.

Hope that makes things clearer,
Thanks again for the thorough reading,

Antoine

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-17 19:09                       ` Antoine Pelisse
@ 2013-06-17 19:52                         ` Junio C Hamano
  2013-06-17 21:33                           ` Antoine Pelisse
  0 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2013-06-17 19:52 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

>>> +     unsigned long changes = ULONG_MAX;
>
> Let me explain what "changes" means, as I know it will help the rest
> of the message:
> It counts the number of *added* blank lines we have ignored since
> "lxch" (needed to calculate the distance between lxch and xch)
> It also has the meaning of what was called "interesting" before.
> If changes == ULONG_MAX, we are still in interesting zone, otherwise
> it means we have ignored "changes" *added* blank lines (0 being a
> valid value).

OK.  That deserves a comment next to this variable.

> (Actually, After rereading this part, it looks like I could check that
> lxch == xchp rather than setting changes to ULONG_MAX).

Yeah, I think so.

>>> +             if (distance < max_ignorable &&
>>> +                 (!xch->ignore || changes == ULONG_MAX)) {
>>> +                     lxch = xch;
>>> +                     changes = ULONG_MAX;
>>
> - If we are still in interesting zone, we take it, even if it's
> ignorable change. Because it's close enough.
> - Otherwise, only take real changes. We are close to another change,
> and we are still in the loop, so it must be interesting.

OK.

>>> +             } else if (changes != ULONG_MAX &&
>>> +                        xch->i1 + changes - (lxch->i1 + lxch->chg1) > max_common) {
>>> +                     break;
>
> If we are no longer in "interesting zone" (changes != ULONG_MAX), it
> means we will stop if the distance is too big.
> "changes" is used in the calculation to consider the changes we have
> already ignored (xch->i1 - (lxch->i1 + lxch->chg1) will only work if
> xch and lxch are consecutive, we need to add the blank lines we
> ignored).

And this uses max_common that is much larger than max_ignorable
because...?

The last interesting change, with its post context and inter hunk
gap, together with precontext for this one, is close enough to the
beginning of this one.  So it is understandable if xch by itself is
intereseting to use max_common.  Even an interesting one, if that is
so far from the last interesting one, should not be part of this
hunk.

However, if the current one is by itself uninteresting, should we
still use the max_common, or should this be compared with
max_ignorable?
    
>> Could you add comment to the "changes" variable and explain what the
>> variable means?
>>
>>> +             } else if (!xch->ignore) {
>>> +                     lxch = xch;
>>> +                     changes = ULONG_MAX;
>>
>> When this change by itself is interesting, it becomes the "last
>> interesting one" and the hunk continues.
>
> Exactly, and changes goes back to "interesting".
>
>>> +             } else {
>>> +                     if (changes == ULONG_MAX)
>>> +                             changes = 0;
>>> +                     changes += xch->chg2;
>>
>> Puzzled beyond guessing.  Also it is curious why here and only here
>> we look at chg2 side of the things, not i1/chg1 in this whole thing.
>
> chg2 being the number of blank line *additions*.

This is on the else side of if (!xch->ignore), so we are looking at
ignored hunk, which means there is only blank line change.  Can chg2
be 0 while chg1 is not zero, i.e. xch being a blank line removal?

What should happen in that case?  Don't we want to show it, for the
same reason we want to keep removal, as long as it is close enough
to the interesting zone?

> Hope that makes things clearer,

Yes, it helped quite a bit.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-17 19:52                         ` Junio C Hamano
@ 2013-06-17 21:33                           ` Antoine Pelisse
  2013-06-17 23:27                             ` Junio C Hamano
  0 siblings, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-17 21:33 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

>>>> +             } else if (changes != ULONG_MAX &&
>>>> +                        xch->i1 + changes - (lxch->i1 + lxch->chg1) > max_common) {
>>>> +                     break;
>>
>> If we are no longer in "interesting zone" (changes != ULONG_MAX), it
>> means we will stop if the distance is too big.
>> "changes" is used in the calculation to consider the changes we have
>> already ignored (xch->i1 - (lxch->i1 + lxch->chg1) will only work if
>> xch and lxch are consecutive, we need to add the blank lines we
>> ignored).
>
> And this uses max_common that is much larger than max_ignorable
> because...?
>
> The last interesting change, with its post context and inter hunk
> gap, together with precontext for this one, is close enough to the
> beginning of this one.  So it is understandable if xch by itself is
> intereseting to use max_common.  Even an interesting one, if that is
> so far from the last interesting one, should not be part of this
> hunk.
>
> However, if the current one is by itself uninteresting, should we
> still use the max_common, or should this be compared with
> max_ignorable?

Because of the "recursive definition", we don't know yet if an
ignorable change will be interesting or not.
We need to make sure it will be close to another interesting change first.
If it is, it will fall in the first if part, and lxch will catch-up.
If not, we will eventually be too far and break.

Re-reading note: OK, This last sentence ("If not we will eventually be
too far and break") is actually a bug. We might break before we find
something interesting while we should keep going. For example in such
a case, we should display like this, but won't:

@@ -x,x +x,x @@
+change   <--- That is lxch
 1
 2
 3
+       <--- Here we leave "interesting"
 4
 5
+       <--- We are too far and quit searching
 6
 7
+
 8
 9
+
 10
 11
+change

>>>> +             } else {
>>>> +                     if (changes == ULONG_MAX)
>>>> +                             changes = 0;
>>>> +                     changes += xch->chg2;
>>>
>>> Puzzled beyond guessing.  Also it is curious why here and only here
>>> we look at chg2 side of the things, not i1/chg1 in this whole thing.
>>
>> chg2 being the number of blank line *additions*.
>
> This is on the else side of if (!xch->ignore), so we are looking at
> ignored hunk, which means there is only blank line change.  Can chg2
> be 0 while chg1 is not zero, i.e. xch being a blank line removal?

Exactly. It can be a blank line removal. But I don't want to consider
it in the calculation.
Here's why:
We have:
1
2
3




4
5
6

and change it to:
change
1
2
3
4
5
6
change

What should be the output of diff --ignore-blank-lines ?

I chose this alternative:
@@ -1,3 +1,4 @@
+change
 1
 2
 3
@@ -7,3 +5,4 @@
 4
 5
 6
+change

While one could have chosen:
@@ -1,10 +1,8 @@
+change
 1
 2
 3
-
-
-
-
 4
 5
 6
+change

> What should happen in that case?  Don't we want to show it, for the
> same reason we want to keep removal, as long as it is close enough
> to the interesting zone?

Nothing is interesting here, we just leave the interesting zone (if
not already left) because everything else failed.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-17 21:33                           ` Antoine Pelisse
@ 2013-06-17 23:27                             ` Junio C Hamano
  2013-06-19 18:46                               ` Antoine Pelisse
  0 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2013-06-17 23:27 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> Re-reading note: OK, This last sentence ("If not we will eventually be
> too far and break") is actually a bug. We might break before we find
> something interesting while we should keep going. For example in such
> a case, we should display like this, but won't:

Glad to see that my question has helped ;-)

>> This is on the else side of if (!xch->ignore), so we are looking at
>> ignored hunk, which means there is only blank line change.  Can chg2
>> be 0 while chg1 is not zero, i.e. xch being a blank line removal?
>
> Exactly. It can be a blank line removal. But I don't want to consider
> it in the calculation.
> Here's why:
> ...
> What should be the output of diff --ignore-blank-lines ?
>
> I chose this alternative:
> @@ -1,3 +1,4 @@
> +change
>  1
>  2
>  3
> @@ -7,3 +5,4 @@
>  4
>  5
>  6
> +change
>
> While one could have chosen:
> @@ -1,10 +1,8 @@
> +change
>  1
>  2
>  3
> -
> -
> -
> -
>  4
>  5
>  6
> +change
> ...
> Nothing is interesting here, we just leave the interesting zone (if
> not already left) because everything else failed.

Yes, that asymmetry is what I was wondering if we want to have.  If
we show additional blanks as a significant event, I am not so sure
we can say "Nothing is interesting here".

I do not feel strongly either way, but it just felt somewhat
inconsistent.

Thanks.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] diff: add --ignore-blank-lines option
  2013-06-17 23:27                             ` Junio C Hamano
@ 2013-06-19 18:46                               ` Antoine Pelisse
  2013-06-19 22:23                                 ` Junio C Hamano
  0 siblings, 1 reply; 26+ messages in thread
From: Antoine Pelisse @ 2013-06-19 18:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Antoine Pelisse

The goal of the patch is to introduce the GNU diff
-B/--ignore-blank-lines as closely as possible. The short option is not
available because it's already used for "break-rewrites".

When this option is used, git-diff will not create hunks that simply
add or remove empty lines, but will still show empty lines
addition/suppression if they are close enough to "valuable" changes.

There are two differences between this option and GNU diff -B option:
- GNU diff doesn't have "--inter-hunk-context", so this must be handled
- The following sequence looks like a bug (context is displayed twice):

    $ seq 5 >file1
    $ cat <<EOF >file2
    change
    1
    2

    3
    4
    5
    change
    EOF
    $ diff -u -B file1 file2
    --- file1	2013-06-08 22:13:04.471517834 +0200
    +++ file2	2013-06-08 22:13:23.275517855 +0200
    @@ -1,5 +1,7 @@
    +change
     1
     2
    +
     3
     4
     5
    @@ -3,3 +5,4 @@
     3
     4
     5
    +change

So here is a more thorough description of the option:
- real changes are interesting
- blank lines that are close enough (less than context size) to
interesting changes are considered interesting (recursive definition)
- "context" lines are used around each hunk of interesting changes
- If two hunks are separated by less than "inter-hunk-context", they
will be merged into one.

The implementation does the "interesting changes selection" in a single
pass.

Signed-off-by: Antoine Pelisse <apelisse@gmail.com>
---
Changes since last version:
- "changes" variable has been renamed to "ignored"
- "lxch == xchp" is used instead of setting "changes" to a specific
value (ULONG_MAX)
- Fixed commit message
- Fixed bug we discussed earlier:
I added a new "else if" branch to handle that specific case (short-cut the bug)
I'm unfortunately duplicating some logic, but I don't see any way out of this.
- Added a test for the bug

 Documentation/diff-options.txt |    3 +
 diff.c                         |    2 +
 t/t4015-diff-whitespace.sh     |  345 ++++++++++++++++++++++++++++++++++++++++
 xdiff/xdiff.h                  |    2 +
 xdiff/xdiffi.c                 |   29 +++-
 xdiff/xdiffi.h                 |    1 +
 xdiff/xemit.c                  |   49 +++++-
 xdiff/xemit.h                  |    2 +-
 xdiff/xutils.c                 |   13 ++
 xdiff/xutils.h                 |    1 +
 10 files changed, 439 insertions(+), 8 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index b8a9b86..4e042d9 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -439,6 +439,9 @@ endif::git-format-patch[]
 	differences even if one line has whitespace where the other
 	line has none.

+--ignore-blank-lines::
+	Ignore changes whose lines are all blank.
+
 --inter-hunk-context=<lines>::
 	Show the context between diff hunks, up to the specified number
 	of lines, thereby fusing hunks that are close to each other.
diff --git a/diff.c b/diff.c
index f0b3e7c..208094f 100644
--- a/diff.c
+++ b/diff.c
@@ -3593,6 +3593,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_CHANGE);
 	else if (!strcmp(arg, "--ignore-space-at-eol"))
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
+	else if (!strcmp(arg, "--ignore-blank-lines"))
+		DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index cc3db13..3fb4b97 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -142,6 +142,351 @@ EOF
 git diff --ignore-space-at-eol > out
 test_expect_success 'another test, with --ignore-space-at-eol' 'test_cmp expect out'

+test_expect_success 'ignore-blank-lines: only new lines' '
+	test_seq 5 >x &&
+	git update-index x &&
+	test_seq 5 | sed "/3/i \\
+" >x &&
+	git diff --ignore-blank-lines >out &&
+	>expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: only new lines with space' '
+	test_seq 5 >x &&
+	git update-index x &&
+	test_seq 5 | sed "/3/i \ " >x &&
+	git diff -w --ignore-blank-lines >out &&
+	>expect &&
+	test_cmp out expect
+'
+
+test_expect_success 'ignore-blank-lines: after change' '
+	cat <<-\EOF >x &&
+	1
+	2
+
+	3
+	4
+	5
+
+	6
+	7
+	EOF
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+
+	1
+	2
+	3
+	4
+	5
+	6
+
+	7
+	EOF
+	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,6 +1,7 @@
+	+change
+	+
+	 1
+	 2
+	-
+	 3
+	 4
+	 5
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: before change' '
+	cat <<-\EOF >x &&
+	1
+	2
+
+	3
+	4
+	5
+	6
+	7
+	EOF
+	git update-index x &&
+	cat <<-\EOF >x &&
+
+	1
+	2
+	3
+	4
+	5
+
+	6
+	7
+	change
+	EOF
+	git diff --inter-hunk-context=100 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -4,5 +4,7 @@
+	 3
+	 4
+	 5
+	+
+	 6
+	 7
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes' '
+	cat <<-\EOF >x &&
+	1
+	2
+	3
+	4
+	5
+
+
+	6
+	7
+	8
+	9
+	10
+	EOF
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+	6
+	7
+	8
+
+	9
+	10
+	change
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,5 +1,7 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	@@ -8,5 +8,7 @@
+	 6
+	 7
+	 8
+	+
+	 9
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: between changes (with interhunkctx)' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+
+	6
+	7
+	8
+	9
+
+	10
+	change
+	EOF
+	git diff --inter-hunk-context=2 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,10 +1,15 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	+
+	 6
+	 7
+	 8
+	 9
+	+
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: scattered spaces' '
+	test_seq 10 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+	3
+
+	4
+
+	5
+
+	6
+
+
+	7
+
+	8
+	9
+	10
+	change
+	EOF
+	git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,3 +1,4 @@
+	+change
+	 1
+	 2
+	 3
+	@@ -8,3 +15,4 @@
+	 8
+	 9
+	 10
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: spaces coalesce' '
+	test_seq 6 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+	3
+
+	4
+
+	5
+
+	6
+	change
+	EOF
+	git diff --inter-hunk-context=4 --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,6 +1,11 @@
+	+change
+	 1
+	 2
+	 3
+	+
+	 4
+	+
+	 5
+	+
+	 6
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
+test_expect_success 'ignore-blank-lines: mix changes and blank lines' '
+	test_seq 16 >x &&
+	git update-index x &&
+	cat <<-\EOF >x &&
+	change
+	1
+	2
+
+	3
+	4
+	5
+	change
+	6
+	7
+	8
+
+	9
+	10
+	11
+	change
+	12
+	13
+	14
+
+	15
+	16
+	change
+	EOF
+	git diff --ignore-blank-lines >out.tmp &&
+	cat <<-\EOF >expected &&
+	diff --git a/x b/x
+	--- a/x
+	+++ b/x
+	@@ -1,8 +1,11 @@
+	+change
+	 1
+	 2
+	+
+	 3
+	 4
+	 5
+	+change
+	 6
+	 7
+	 8
+	@@ -9,8 +13,11 @@
+	 9
+	 10
+	 11
+	+change
+	 12
+	 13
+	 14
+	+
+	 15
+	 16
+	+change
+	EOF
+	compare_diff_patch expected out.tmp
+'
+
 test_expect_success 'check mixed spaces and tabs in indent' '

 	# This is indented with SP HT SP.
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 219a3bb..c033991 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -39,6 +39,8 @@ extern "C" {
 #define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
 #define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)

+#define XDF_IGNORE_BLANK_LINES (1 << 7)
+
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index b2eb6db..2358a2d 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -394,6 +394,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 	xch->i2 = i2;
 	xch->chg1 = chg1;
 	xch->chg2 = chg2;
+	xch->ignore = 0;

 	return xch;
 }
@@ -544,7 +545,9 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	xdchange_t *xch, *xche;

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;
 		if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
 				     xch->i2, xche->i2 + xche->chg2 - xch->i2,
 				     ecb->priv) < 0)
@@ -553,6 +556,27 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 	return 0;
 }

+static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+{
+	xdchange_t *xch;
+
+	for (xch = xscr; xch; xch = xch->next) {
+		int ignore = 1;
+		xrecord_t **rec;
+		long i;
+
+		rec = &xe->xdf1.recs[xch->i1];
+		for (i = 0; i < xch->chg1 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+		rec = &xe->xdf2.recs[xch->i2];
+		for (i = 0; i < xch->chg2 && ignore; i++)
+			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
+
+		xch->ignore = ignore;
+	}
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
@@ -571,6 +595,9 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		return -1;
 	}
 	if (xscr) {
+		if (xpp->flags & XDF_IGNORE_BLANK_LINES)
+			xdl_mark_ignorable(xscr, &xe, xpp->flags);
+
 		if (ef(&xe, xscr, ecb, xecfg) < 0) {

 			xdl_free_script(xscr);
diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h
index 7a92ea9..8b81206 100644
--- a/xdiff/xdiffi.h
+++ b/xdiff/xdiffi.h
@@ -41,6 +41,7 @@ typedef struct s_xdchange {
 	struct s_xdchange *next;
 	long i1, i2;
 	long chg1, chg2;
+	int ignore;
 } xdchange_t;


diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index d11dbf9..3854fc7 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -56,16 +56,51 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 /*
  * Starting at the passed change atom, find the latest change atom to be included
  * inside the differential hunk according to the specified configuration.
+ * Also advance xscr if the first changes must be discarded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg) {
-	xdchange_t *xch, *xchp;
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
+{
+	xdchange_t *xch, *xchp, *lxch;
 	long max_common = 2 * xecfg->ctxlen + xecfg->interhunkctxlen;
+	long max_ignorable = xecfg->ctxlen;
+	unsigned long ignored = 0; /* number of ignored blank lines */
+
+	/* remove ignorable changes that are too far before other changes */
+	for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
+		xch = xchp->next;
+
+		if (xch == NULL ||
+		    xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
+			*xscr = xch;
+	}
+
+	if (*xscr == NULL)
+		return NULL;
+
+	lxch = *xscr;

-	for (xchp = xscr, xch = xscr->next; xch; xchp = xch, xch = xch->next)
-		if (xch->i1 - (xchp->i1 + xchp->chg1) > max_common)
+	for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
+		long distance = xch->i1 - (xchp->i1 + xchp->chg1);
+		if (distance > max_common)
 			break;

-	return xchp;
+		if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) {
+			lxch = xch;
+			ignored = 0;
+		} else if (distance < max_ignorable && xch->ignore) {
+			ignored += xch->chg2;
+		} else if (lxch != xchp &&
+			   xch->i1 + ignored - (lxch->i1 + lxch->chg1) > max_common) {
+			break;
+		} else if (!xch->ignore) {
+			lxch = xch;
+			ignored = 0;
+		} else {
+			ignored += xch->chg2;
+		}
+	}
+
+	return lxch;
 }


@@ -139,7 +174,9 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		return xdl_emit_common(xe, xscr, ecb, xecfg);

 	for (xch = xscr; xch; xch = xche->next) {
-		xche = xdl_get_hunk(xch, xecfg);
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;

 		s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
 		s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
diff --git a/xdiff/xemit.h b/xdiff/xemit.h
index c2e2e83..d297107 100644
--- a/xdiff/xemit.h
+++ b/xdiff/xemit.h
@@ -27,7 +27,7 @@
 typedef int (*emit_func_t)(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			   xdemitconf_t const *xecfg);

-xdchange_t *xdl_get_hunk(xdchange_t *xscr, xdemitconf_t const *xecfg);
+xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg);
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg);

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 9504eae..62cb23d 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -143,6 +143,19 @@ long xdl_guess_lines(mmfile_t *mf, long sample) {
 	return nl + 1;
 }

+int xdl_blankline(const char *line, long size, long flags)
+{
+	long i;
+
+	if (!(flags & XDF_WHITESPACE_FLAGS))
+		return (size <= 1);
+
+	for (i = 0; i < size && XDL_ISSPACE(line[i]); i++)
+		;
+
+	return (i == size);
+}
+
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
 	int i1, i2;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index ad1428e..4646ce5 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -32,6 +32,7 @@ int xdl_cha_init(chastore_t *cha, long isize, long icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
+int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
 unsigned int xdl_hashbits(unsigned int size);
--
1.7.9.5

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] diff: add --ignore-blank-lines option
  2013-06-19 18:46                               ` Antoine Pelisse
@ 2013-06-19 22:23                                 ` Junio C Hamano
  0 siblings, 0 replies; 26+ messages in thread
From: Junio C Hamano @ 2013-06-19 22:23 UTC (permalink / raw)
  To: Antoine Pelisse; +Cc: git

Antoine Pelisse <apelisse@gmail.com> writes:

> So here is a more thorough description of the option:
> - real changes are interesting
> - blank lines that are close enough (less than context size) to
> interesting changes are considered interesting (recursive definition)
> - "context" lines are used around each hunk of interesting changes
> - If two hunks are separated by less than "inter-hunk-context", they
> will be merged into one.

Thanks; will replace what is queued in 'pu'.  Let's advance this
version to 'next' soonish.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2013-06-19 22:23 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-26 17:58 [PATCH] diff: add --ignore-blank-lines option Antoine Pelisse
2013-05-26 20:35 ` Johannes Sixt
2013-05-27  7:14   ` Antoine Pelisse
2013-06-01  8:48     ` Antoine Pelisse
2013-06-04 18:26 ` Junio C Hamano
2013-06-04 19:08   ` Antoine Pelisse
2013-06-04 20:46     ` Junio C Hamano
2013-06-04 20:51       ` Antoine Pelisse
2013-06-08 20:44       ` Antoine Pelisse
2013-06-09  7:33         ` Eric Sunshine
2013-06-09 20:07         ` Junio C Hamano
2013-06-09 20:32           ` Antoine Pelisse
2013-06-09 21:28             ` Junio C Hamano
2013-06-10 21:03           ` Antoine Pelisse
2013-06-10 21:43             ` Junio C Hamano
2013-06-12 13:21               ` Antoine Pelisse
2013-06-12 17:22                 ` Junio C Hamano
2013-06-15 13:01                   ` Antoine Pelisse
2013-06-17 16:18                     ` Junio C Hamano
2013-06-17 17:58                       ` Antoine Pelisse
2013-06-17 19:09                       ` Antoine Pelisse
2013-06-17 19:52                         ` Junio C Hamano
2013-06-17 21:33                           ` Antoine Pelisse
2013-06-17 23:27                             ` Junio C Hamano
2013-06-19 18:46                               ` Antoine Pelisse
2013-06-19 22:23                                 ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).