[RFC PATCH v5] Implement --first-parent for git rev-list --bisect

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
@ 2018-06-22 12:39 Tiago Botelho
  2018-06-25 17:33 ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Tiago Botelho @ 2018-06-22 12:39 UTC (permalink / raw)
  To: git
  Cc: christian.couder, johannes.schindelin, haraldnordgren, gitster,
	Tiago Botelho

This will enable users to implement bisecting on first parents
which can be useful for when the commits from a feature branch
that we want to merge are not always tested.

Signed-off-by: Tiago Botelho <tiagonbotelho@hotmail.com>
---

This patch resolves every issue with the unit tests. 

The bug detected by Junio regarding do_find_bisection() propagating 
a weight of -1 over to the child nodes ended up not being an 
issue since it cannot happen in practice, there will always be at 
least one UNINTERESTING parent which will propagate either a weight 
of 0 or 1 throughout the child nodes.

 bisect.c                   | 43 ++++++++++++++++++++++------------
 bisect.h                   |  1 +
 builtin/rev-list.c         |  3 +++
 revision.c                 |  3 ---
 t/t6002-rev-list-bisect.sh | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/bisect.c b/bisect.c
index 4eafc8262..cb80f29c5 100644
--- a/bisect.c
+++ b/bisect.c
@@ -82,15 +82,16 @@ static inline void weight_set(struct commit_list *elem, int weight)
 	*((int*)(elem->item->util)) = weight;
 }
 
-static int count_interesting_parents(struct commit *commit)
+static int count_interesting_parents(struct commit *commit, unsigned bisect_flags)
 {
 	struct commit_list *p;
 	int count;
 
 	for (count = 0, p = commit->parents; p; p = p->next) {
-		if (p->item->object.flags & UNINTERESTING)
-			continue;
-		count++;
+		if (!(p->item->object.flags & UNINTERESTING))
+		    count++;
+		if (bisect_flags & BISECT_FIRST_PARENT)
+			break;
 	}
 	return count;
 }
@@ -117,10 +118,10 @@ static inline int halfway(struct commit_list *p, int nr)
 }
 
 #if !DEBUG_BISECT
-#define show_list(a,b,c,d) do { ; } while (0)
+#define show_list(a,b,c,d,e) do { ; } while (0)
 #else
 static void show_list(const char *debug, int counted, int nr,
-		      struct commit_list *list)
+		      struct commit_list *list, unsigned bisect_flags)
 {
 	struct commit_list *p;
 
@@ -146,10 +147,14 @@ static void show_list(const char *debug, int counted, int nr,
 		else
 			fprintf(stderr, "---");
 		fprintf(stderr, " %.*s", 8, oid_to_hex(&commit->object.oid));
-		for (pp = commit->parents; pp; pp = pp->next)
+		for (pp = commit->parents; pp; pp = pp->next) {
 			fprintf(stderr, " %.*s", 8,
 				oid_to_hex(&pp->item->object.oid));
 
+			if (bisect_flags & BISECT_FIRST_PARENT)
+				break;
+		}
+
 		subject_len = find_commit_subject(buf, &subject_start);
 		if (subject_len)
 			fprintf(stderr, " %.*s", subject_len, subject_start);
@@ -267,13 +272,13 @@ static struct commit_list *do_find_bisection(struct commit_list *list,
 		unsigned flags = commit->object.flags;
 
 		p->item->util = &weights[n++];
-		switch (count_interesting_parents(commit)) {
+		switch (count_interesting_parents(commit, bisect_flags)) {
 		case 0:
 			if (!(flags & TREESAME)) {
 				weight_set(p, 1);
 				counted++;
 				show_list("bisection 2 count one",
-					  counted, nr, list);
+					  counted, nr, list, bisect_flags);
 			}
 			/*
 			 * otherwise, it is known not to reach any
@@ -289,7 +294,7 @@ static struct commit_list *do_find_bisection(struct commit_list *list,
 		}
 	}
 
-	show_list("bisection 2 initialize", counted, nr, list);
+	show_list("bisection 2 initialize", counted, nr, list, bisect_flags);
 
 	/*
 	 * If you have only one parent in the resulting set
@@ -319,7 +324,7 @@ static struct commit_list *do_find_bisection(struct commit_list *list,
 		counted++;
 	}
 
-	show_list("bisection 2 count_distance", counted, nr, list);
+	show_list("bisection 2 count_distance", counted, nr, list, bisect_flags);
 
 	while (counted < nr) {
 		for (p = list; p; p = p->next) {
@@ -329,6 +334,11 @@ static struct commit_list *do_find_bisection(struct commit_list *list,
 			if (0 <= weight(p))
 				continue;
 			for (q = p->item->parents; q; q = q->next) {
+				if ((bisect_flags & BISECT_FIRST_PARENT)) {
+					if (weight(q) < 0)
+						q = NULL;
+					break;
+				}
 				if (q->item->object.flags & UNINTERESTING)
 					continue;
 				if (0 <= weight(q))
@@ -346,7 +356,7 @@ static struct commit_list *do_find_bisection(struct commit_list *list,
 				weight_set(p, weight(q)+1);
 				counted++;
 				show_list("bisection 2 count one",
-					  counted, nr, list);
+					  counted, nr, list, bisect_flags);
 			}
 			else
 				weight_set(p, weight(q));
@@ -357,7 +367,7 @@ static struct commit_list *do_find_bisection(struct commit_list *list,
 		}
 	}
 
-	show_list("bisection 2 counted all", counted, nr, list);
+	show_list("bisection 2 counted all", counted, nr, list, bisect_flags);
 
 	if (!find_all)
 		return best_bisection(list, nr);
@@ -372,7 +382,7 @@ void find_bisection(struct commit_list **commit_list, int *reaches,
 	struct commit_list *list, *p, *best, *next, *last;
 	int *weights;
 
-	show_list("bisection 2 entry", 0, 0, *commit_list);
+	show_list("bisection 2 entry", 0, 0, *commit_list, bisect_flags);
 
 	/*
 	 * Count the number of total and tree-changing items on the
@@ -395,7 +405,7 @@ void find_bisection(struct commit_list **commit_list, int *reaches,
 		on_list++;
 	}
 	list = last;
-	show_list("bisection 2 sorted", 0, nr, list);
+	show_list("bisection 2 sorted", 0, nr, list, bisect_flags);
 
 	*all = nr;
 	weights = xcalloc(on_list, sizeof(*weights));
@@ -962,6 +972,9 @@ int bisect_next_all(const char *prefix, int no_checkout)
 	if (skipped_revs.nr)
 		bisect_flags |= BISECT_FIND_ALL;
 
+	if (revs.first_parent_only)
+		bisect_flags |= BISECT_FIRST_PARENT;
+
 	find_bisection(&revs.commits, &reaches, &all, bisect_flags);
 	revs.commits = managed_skipped(revs.commits, &tried);
 
diff --git a/bisect.h b/bisect.h
index 1d40a33ad..e445567c2 100644
--- a/bisect.h
+++ b/bisect.h
@@ -2,6 +2,7 @@
 #define BISECT_H
 
 #define BISECT_FIND_ALL		(1u<<0)
+#define BISECT_FIRST_PARENT    	(1u<<1)
 
 /*
  * Find bisection. If something is found, `reaches` will be the number of
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 8752f5bbe..66439e1b3 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -538,6 +538,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 	if (bisect_list) {
 		int reaches, all;
 
+		if (revs.first_parent_only)
+			bisect_flags |= BISECT_FIRST_PARENT;
+
 		find_bisection(&revs.commits, &reaches, &all, bisect_flags);
 
 		if (bisect_show_vars)
diff --git a/revision.c b/revision.c
index 4e0e193e5..b9d063805 100644
--- a/revision.c
+++ b/revision.c
@@ -2474,9 +2474,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 	if (!revs->reflog_info && revs->grep_filter.use_reflog_filter)
 		die("cannot use --grep-reflog without --walk-reflogs");
 
-	if (revs->first_parent_only && revs->bisect)
-		die(_("--first-parent is incompatible with --bisect"));
-
 	if (revs->expand_tabs_in_log < 0)
 		revs->expand_tabs_in_log = revs->expand_tabs_in_log_default;
 
diff --git a/t/t6002-rev-list-bisect.sh b/t/t6002-rev-list-bisect.sh
index a66140803..c9cd349de 100755
--- a/t/t6002-rev-list-bisect.sh
+++ b/t/t6002-rev-list-bisect.sh
@@ -263,4 +263,62 @@ test_expect_success 'rev-parse --bisect can default to good/bad refs' '
 	test_cmp expect.sorted actual.sorted
 '
 
+# We generate the following commit graph:
+#
+#   B ------ C
+#  /          \
+# A            FX
+#  \          /
+#   D - CC - EX
+
+test_expect_success 'setup' '
+  test_commit A &&
+  test_commit B &&
+  test_commit C &&
+  git reset --hard A &&
+  test_commit D &&
+  test_commit CC &&
+  test_commit EX &&
+  test_merge FX C
+'
+
+test_output_expect_success "--bisect --first-parent" 'git rev-list --bisect --first-parent FX ^A' <<EOF
+$(git rev-parse CC)
+EOF
+
+test_output_expect_success "--first-parent" 'git rev-list --first-parent FX ^A' <<EOF
+$(git rev-parse FX)
+$(git rev-parse EX)
+$(git rev-parse CC)
+$(git rev-parse D)
+EOF
+
+test_output_expect_success "--bisect-vars --first-parent" 'git rev-list --bisect-vars --first-parent FX ^A' <<EOF
+bisect_rev='$(git rev-parse CC)'
+bisect_nr=1
+bisect_good=1
+bisect_bad=1
+bisect_all=4
+bisect_steps=1
+EOF
+
+test_expect_success "--bisect-all --first-parent" '
+cat >expect1 <<EOF &&
+$(git rev-parse CC) (dist=2)
+$(git rev-parse EX) (dist=1)
+$(git rev-parse D) (dist=1)
+$(git rev-parse FX) (dist=0)
+EOF
+
+cat >expect2 <<EOF &&
+$(git rev-parse CC) (dist=2)
+$(git rev-parse D) (dist=1)
+$(git rev-parse EX) (dist=1)
+$(git rev-parse FX) (dist=0)
+EOF
+
+git rev-list --bisect-all --first-parent FX ^A >actual &&
+  ( test_cmp expect1 actual || test_cmp expect2 actual )
+'
+
 test_done
-- 
2.16.3


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-22 12:39 [RFC PATCH v5] Implement --first-parent for git rev-list --bisect Tiago Botelho
@ 2018-06-25 17:33 ` Junio C Hamano
  2018-06-26 11:59   ` Christian Couder
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2018-06-25 17:33 UTC (permalink / raw)
  To: Tiago Botelho
  Cc: git, christian.couder, johannes.schindelin, haraldnordgren,
	Tiago Botelho

Tiago Botelho <tiagonbotelho@gmail.com> writes:

> +test_expect_success "--bisect-all --first-parent" '
> +cat >expect1 <<EOF &&
> +$(git rev-parse CC) (dist=2)
> +$(git rev-parse EX) (dist=1)
> +$(git rev-parse D) (dist=1)
> +$(git rev-parse FX) (dist=0)
> +EOF
> +
> +cat >expect2 <<EOF &&
> +$(git rev-parse CC) (dist=2)
> +$(git rev-parse D) (dist=1)
> +$(git rev-parse EX) (dist=1)
> +$(git rev-parse FX) (dist=0)
> +EOF
> +
> +git rev-list --bisect-all --first-parent FX ^A >actual &&
> +  ( test_cmp expect1 actual || test_cmp expect2 actual )
> +'

I hate to say this, but the above looks like a typical
unmaintainable mess.

What happens when you or somebody else later needs to update the
graph to be tested to add one more commit (or even more)?  Would it
be enough to add another "rev-parse" plus "dist=X" line in both
expects?  Or do we see a trap for combinatorial explosion that
requires us to add new expect$N?





^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-25 17:33 ` Junio C Hamano
@ 2018-06-26 11:59   ` Christian Couder
  2018-06-26 14:10     ` Johannes Schindelin
  0 siblings, 1 reply; 18+ messages in thread
From: Christian Couder @ 2018-06-26 11:59 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Tiago Botelho, git, Johannes Schindelin, Harald Nordgren,
	Tiago Botelho

On Mon, Jun 25, 2018 at 7:33 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Tiago Botelho <tiagonbotelho@gmail.com> writes:
>
>> +test_expect_success "--bisect-all --first-parent" '
>> +cat >expect1 <<EOF &&
>> +$(git rev-parse CC) (dist=2)
>> +$(git rev-parse EX) (dist=1)
>> +$(git rev-parse D) (dist=1)
>> +$(git rev-parse FX) (dist=0)
>> +EOF
>> +
>> +cat >expect2 <<EOF &&
>> +$(git rev-parse CC) (dist=2)
>> +$(git rev-parse D) (dist=1)
>> +$(git rev-parse EX) (dist=1)
>> +$(git rev-parse FX) (dist=0)
>> +EOF
>> +
>> +git rev-list --bisect-all --first-parent FX ^A >actual &&
>> +  ( test_cmp expect1 actual || test_cmp expect2 actual )
>> +'
>
> I hate to say this, but the above looks like a typical
> unmaintainable mess.
>
> What happens when you or somebody else later needs to update the
> graph to be tested to add one more commit (or even more)?  Would it
> be enough to add another "rev-parse" plus "dist=X" line in both
> expects?  Or do we see a trap for combinatorial explosion that
> requires us to add new expect$N?

What about the following then:

test_dist_order () {
    file="$1"
    n="$2"
    while read -r hash dist
    do
        d=$(echo "$dist" | sed -e "s/(dist=\(.*\))/\1/")
        case "$d" in
            ''|*[!0-9]*) return 1 ;;
            *) ;;
        esac
        test "$d" -le "$n" || return 1
        n="$d"
    done <"$file"
}

test_expect_success "--bisect-all --first-parent" '
    cat >expect <<EOF &&
$(git rev-parse CC) (dist=2)
$(git rev-parse EX) (dist=1)
$(git rev-parse D) (dist=1)
$(git rev-parse FX) (dist=0)
EOF
    sort expect >expect_sorted &&
    git rev-list --bisect-all --first-parent FX ^A >actual &&
    sort actual >actual_sorted &&
    test_cmp expect_sorted actual_sorted &&
    test_dist_order actual 2
'

This feels overkill to me, but it should scale if we ever make more
complex tests.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-26 11:59   ` Christian Couder
@ 2018-06-26 14:10     ` Johannes Schindelin
  2018-06-26 15:41       ` Christian Couder
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2018-06-26 14:10 UTC (permalink / raw)
  To: Christian Couder
  Cc: Junio C Hamano, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Hi Chris,

On Tue, 26 Jun 2018, Christian Couder wrote:

> On Mon, Jun 25, 2018 at 7:33 PM, Junio C Hamano <gitster@pobox.com> wrote:
> > Tiago Botelho <tiagonbotelho@gmail.com> writes:
> >
> >> +test_expect_success "--bisect-all --first-parent" '
> >> +cat >expect1 <<EOF &&
> >> +$(git rev-parse CC) (dist=2)
> >> +$(git rev-parse EX) (dist=1)
> >> +$(git rev-parse D) (dist=1)
> >> +$(git rev-parse FX) (dist=0)
> >> +EOF
> >> +
> >> +cat >expect2 <<EOF &&
> >> +$(git rev-parse CC) (dist=2)
> >> +$(git rev-parse D) (dist=1)
> >> +$(git rev-parse EX) (dist=1)
> >> +$(git rev-parse FX) (dist=0)
> >> +EOF
> >> +
> >> +git rev-list --bisect-all --first-parent FX ^A >actual &&
> >> +  ( test_cmp expect1 actual || test_cmp expect2 actual )
> >> +'
> >
> > I hate to say this, but the above looks like a typical
> > unmaintainable mess.
> >
> > What happens when you or somebody else later needs to update the
> > graph to be tested to add one more commit (or even more)?  Would it
> > be enough to add another "rev-parse" plus "dist=X" line in both
> > expects?  Or do we see a trap for combinatorial explosion that
> > requires us to add new expect$N?
> 
> What about the following then:
> 
> test_dist_order () {
>     file="$1"
>     n="$2"
>     while read -r hash dist
>     do
>         d=$(echo "$dist" | sed -e "s/(dist=\(.*\))/\1/")
>         case "$d" in
>             ''|*[!0-9]*) return 1 ;;
>             *) ;;
>         esac
>         test "$d" -le "$n" || return 1
>         n="$d"
>     done <"$file"
> }
> 
> test_expect_success "--bisect-all --first-parent" '
>     cat >expect <<EOF &&
> $(git rev-parse CC) (dist=2)
> $(git rev-parse EX) (dist=1)
> $(git rev-parse D) (dist=1)
> $(git rev-parse FX) (dist=0)
> EOF
>     sort expect >expect_sorted &&
>     git rev-list --bisect-all --first-parent FX ^A >actual &&
>     sort actual >actual_sorted &&
>     test_cmp expect_sorted actual_sorted &&
>     test_dist_order actual 2
> '
> 
> This feels overkill to me, but it should scale if we ever make more
> complex tests.

I *think* that you misunderstand Junio. At least when I read this:

> $(git rev-parse CC) (dist=2)
> $(git rev-parse EX) (dist=1)
> $(git rev-parse D) (dist=1)
> $(git rev-parse FX) (dist=0)

I go: Huh?!?!?!?!?!

What's CC? Is it Christian Couder? Creative Commons? Crudely Complex?

The point, for me, is: if this test fails, at some stage in the future,
for any reason, it will be a major pain to even dissect what the test is
supposed to do. This is no good. And you can do better. A lot better. You
can write the test in a way that the head of a reader does not hurt, and
at the same time it is still obvious what it does, and obvious that it
does the right thing.

One thing that makes the brain stumble for certain is when you deviate
from the conventions, especially when it is for no good reason at all. In
this case (and I am not sure why you, as a long-time contributor, did not
spot this before public review):

- the titles of the test cases leave a lot of room for improvement,

- the lines are too long,

- the convention to end the `test_expect_success` line with an opening
  single quote is not heeded,

- the graph is definitely written in an unnecessarily different format
  than in the same test script!!!

- speaking of the graph: there is a perfectly fine commit graph already.
  Why on earth is it not reused?

In this particular case it even feels as if this test is not even testing
what it should test at all:

- it should verify that all of the commits in the first parent lineage are
  part of the list

- it should verify that none of the other commits are in the list

And that is really all there is to test. You can even write that in a much
easier way:

-- snip --
test_expect_success '--first-parent --bisect-all lists correct revs' '
	git rev-list --first-parent --bisect-all F..E >revs &&
        # E and e1..e8 need to be shown, F and f1..f4 not
	test_line_count = 9 revs &&
	for rev in E e1 e2 e3 e4 e5 e6 e7 e8
	do
		grep "^$(git rev-parse $rev) " revs || {
			echo "$rev not shown" >&2 &&
			return 1
		}
	done
'
-- snap --

To test more precisely for the order or the distance would be both
overkill and likely to be unreadable.

To test `--bisect-vars` here would be excessive, as the part that handles
that particular option is not even touched. All that is touched is the
logic in the bisect algorithm in conjunction with --first-parent. And that
is all that should be tested here.

With a test like the one I outlined above, I only have one more gripe
about the patch: the commit message does nothing to explain this part of
the diff:

+                               if ((bisect_flags & BISECT_FIRST_PARENT)) {
+                                       if (weight(q) < 0)
+                                               q = NULL;
+                                       break;
+                               }

And I would really, really like that to be explained in the commit
message. Because to me, it is completely opaque why this needs to be here.
The rest of the diff is pretty obvious, though.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-26 14:10     ` Johannes Schindelin
@ 2018-06-26 15:41       ` Christian Couder
  2018-06-26 21:16         ` Johannes Schindelin
  2018-06-26 22:20         ` Junio C Hamano
  0 siblings, 2 replies; 18+ messages in thread
From: Christian Couder @ 2018-06-26 15:41 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Junio C Hamano, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Hi Dscho,

On Tue, Jun 26, 2018 at 4:10 PM, Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> On Tue, 26 Jun 2018, Christian Couder wrote:
>
>> On Mon, Jun 25, 2018 at 7:33 PM, Junio C Hamano <gitster@pobox.com> wrote:
>>
>> > I hate to say this, but the above looks like a typical
>> > unmaintainable mess.
>> >
>> > What happens when you or somebody else later needs to update the
>> > graph to be tested to add one more commit (or even more)?  Would it
>> > be enough to add another "rev-parse" plus "dist=X" line in both
>> > expects?  Or do we see a trap for combinatorial explosion that
>> > requires us to add new expect$N?
>>
>> What about the following then:
>>
>> test_dist_order () {
>>     file="$1"
>>     n="$2"
>>     while read -r hash dist
>>     do
>>         d=$(echo "$dist" | sed -e "s/(dist=\(.*\))/\1/")
>>         case "$d" in
>>             ''|*[!0-9]*) return 1 ;;
>>             *) ;;
>>         esac
>>         test "$d" -le "$n" || return 1
>>         n="$d"
>>     done <"$file"
>> }
>>
>> test_expect_success "--bisect-all --first-parent" '
>>     cat >expect <<EOF &&
>> $(git rev-parse CC) (dist=2)
>> $(git rev-parse EX) (dist=1)
>> $(git rev-parse D) (dist=1)
>> $(git rev-parse FX) (dist=0)
>> EOF
>>     sort expect >expect_sorted &&
>>     git rev-list --bisect-all --first-parent FX ^A >actual &&
>>     sort actual >actual_sorted &&
>>     test_cmp expect_sorted actual_sorted &&
>>     test_dist_order actual 2
>> '
>>
>> This feels overkill to me, but it should scale if we ever make more
>> complex tests.
>
> I *think* that you misunderstand Junio. At least when I read this:
>
>> $(git rev-parse CC) (dist=2)
>> $(git rev-parse EX) (dist=1)
>> $(git rev-parse D) (dist=1)
>> $(git rev-parse FX) (dist=0)
>
> I go: Huh?!?!?!?!?!
>
> What's CC? Is it Christian Couder? Creative Commons? Crudely Complex?

I agree that the name of the commit could be improved.

> The point, for me, is: if this test fails, at some stage in the future,
> for any reason, it will be a major pain to even dissect what the test is
> supposed to do.

I think the test is quite simple and there is an ascii picture, so it
is not so difficult to understand.

> This is no good. And you can do better. A lot better. You
> can write the test in a way that the head of a reader does not hurt, and
> at the same time it is still obvious what it does, and obvious that it
> does the right thing.

Obviousness is often not the same for everybody.

> One thing that makes the brain stumble for certain is when you deviate
> from the conventions, especially when it is for no good reason at all. In
> this case (and I am not sure why you, as a long-time contributor, did not
> spot this before public review):

Please note that I never committed myself (like most of us who are not
maintainer actually) to reviewing everything bisect related (or
everything that Tiago works on).

> - the titles of the test cases leave a lot of room for improvement,
>
> - the lines are too long,
>
> - the convention to end the `test_expect_success` line with an opening
>   single quote is not heeded,

If you take a look at the beginning of the script you will see that
there are those problems there too.

I know that we should try to do better, but here I am mostly
interested in moving forward a feature that people have requested for
ages, not cleaning up those tests. If someone else like you or Junio
thinks that it would be a good time to clean things up a bit, then I
am ok with it, but that's not my main goal here.

> - the graph is definitely written in an unnecessarily different format
>   than in the same test script!!!

Just above you complain about things that are similar to the previous
tests and now you complain about things that are different...

> - speaking of the graph: there is a perfectly fine commit graph already.
>   Why on earth is it not reused?

Perhaps because it is more complex than needed to test this feature
and/or to understand what happens. And I don't think we require
everywhere only one commit graph per test script.

> In this particular case it even feels as if this test is not even testing
> what it should test at all:
>
> - it should verify that all of the commits in the first parent lineage are
>   part of the list

It does that.

> - it should verify that none of the other commits are in the list

It does that too.

> And that is really all there is to test.

I don't agree. I think that when possible, especially when testing
plumbing commands like rev-list, it is a good thing to test as many
things as possible at once.

> You can even write that in a much
> easier way:
>
> -- snip --
> test_expect_success '--first-parent --bisect-all lists correct revs' '
>         git rev-list --first-parent --bisect-all F..E >revs &&
>         # E and e1..e8 need to be shown, F and f1..f4 not
>         test_line_count = 9 revs &&
>         for rev in E e1 e2 e3 e4 e5 e6 e7 e8
>         do
>                 grep "^$(git rev-parse $rev) " revs || {
>                         echo "$rev not shown" >&2 &&
>                         return 1
>                 }
>         done
> '
> -- snap --
>
> To test more precisely for the order or the distance would be both
> overkill and likely to be unreadable.

I don't think the previous version was either overkill or unreadable.
Yeah it had other (potential in my opinion) problems, but I was trying
with my suggestion to find a good balance between the different
requirements (readability, complexity, maintainability, testing many
things).

By the way with only the 2 requirements you list above, I think the
simplest would be sorting things like I do it in my suggestion without
checking that the (dist=X) are in the right order with just:

test_expect_success "--bisect-all --first-parent" '
    cat >expect <<EOF &&
$(git rev-parse CC) (dist=2)
$(git rev-parse EX) (dist=1)
$(git rev-parse D) (dist=1)
$(git rev-parse FX) (dist=0)
EOF
    sort expect >expect_sorted &&
    git rev-list --bisect-all --first-parent FX ^A >actual &&
    sort actual >actual_sorted &&
    test_cmp expect_sorted actual_sorted
'

> To test `--bisect-vars` here would be excessive, as the part that handles
> that particular option is not even touched. All that is touched is the
> logic in the bisect algorithm in conjunction with --first-parent. And that
> is all that should be tested here.

I don't agree with that. I think that as we now enable --first-parent
with all the --bisect* options, we should test at least once all the
--bisect* options with --first-parent.

> With a test like the one I outlined above, I only have one more gripe
> about the patch: the commit message does nothing to explain this part of
> the diff:
>
> +                               if ((bisect_flags & BISECT_FIRST_PARENT)) {
> +                                       if (weight(q) < 0)
> +                                               q = NULL;
> +                                       break;
> +                               }
>
> And I would really, really like that to be explained in the commit
> message. Because to me, it is completely opaque why this needs to be here.

This was suggested by Junio in a previous iteration and I agree that a
comment would be welcome.

> The rest of the diff is pretty obvious, though.

Thanks for taking a look anyway,
Christian.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-26 15:41       ` Christian Couder
@ 2018-06-26 21:16         ` Johannes Schindelin
  2018-06-26 22:20         ` Junio C Hamano
  1 sibling, 0 replies; 18+ messages in thread
From: Johannes Schindelin @ 2018-06-26 21:16 UTC (permalink / raw)
  To: Christian Couder
  Cc: Junio C Hamano, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Hi Chris,

On Tue, 26 Jun 2018, Christian Couder wrote:

> On Tue, Jun 26, 2018 at 4:10 PM, Johannes Schindelin
> <Johannes.Schindelin@gmx.de> wrote:
> >
> > The point, for me, is: if this test fails, at some stage in the
> > future, for any reason, it will be a major pain to even dissect what
> > the test is supposed to do.
> 
> I think the test is quite simple and there is an ascii picture, so it is
> not so difficult to understand.

So I tell you that I find it hard to understand and you try to refute this
review by... saying that it is quite simple to understand?

That's not really a basis for discussion there, if you do not even
acknowledge that both Junio and I pointed out a serious flaw in the patch.
I have no idea how you think we can reach consensus if you just deny that
there is a problem *when I just demonstrated that there is a
problem*?!?!?!

> I know that we should try to do better, but here I am mostly
> interested in moving forward a feature that people have requested for
> ages, not cleaning up those tests. If someone else like you or Junio
> thinks that it would be a good time to clean things up a bit, then I
> am ok with it, but that's not my main goal here.

This test is not good. It is not good because if even a long-time
contributor such as myself finds it too hard to understand.

So you cannot just force it forward.

> > - the graph is definitely written in an unnecessarily different format
> > than in the same test script!!!
> 
> Just above you complain about things that are similar to the previous
> tests and now you complain about things that are different...

You misunderstood. I complained that things are *not* similar to previous
tests. I said the exact opposite of what you understood.

> > - speaking of the graph: there is a perfectly fine commit graph already.
> >   Why on earth is it not reused?
> 
> Perhaps because it is more complex than needed to test this feature
> and/or to understand what happens.

What? Have you even *bothered* to look at the graphs?

If at all, the newly introduced is *less* complex (except for the
pointlessly confusing commit names). Both graphs have a branch point, a
tip commit, and a first-parent and a second-parent lineage.

So the new test introduces new commits for no good reason whatsoever, with
a diagram that is unnecessarily different from the one that is already in
the file (horizontal layout instead of vertical, and the horizontal layout
disagrees even with the existing other tests' horizontal graph diagram).

> > In this particular case it even feels as if this test is not even testing
> > what it should test at all:
> >
> > - it should verify that all of the commits in the first parent lineage are
> >   part of the list
> 
> It does that.

Yes, it does that, and you can verify that it does by, uhm, spending 10
minutes pouring over the diagram.

In my proposed test case, you see the same after reading the simple loop
and from the comment and the line count test, i.e. after spending 10
seconds.

So your "It does that" is a bit inappropriate. Of course your test does
that. In a very convoluted way. And I demonstrated that it does not have
to be convoluted at all.

> > - it should verify that none of the other commits are in the list
> 
> It does that too.

Same here. You miss the point. It is obvious in my proposed test case that
it does that. In your proposed test case it is everything but obvious.

> > And that is really all there is to test.
> 
> I don't agree. I think that when possible, especially when testing
> plumbing commands like rev-list, it is a good thing to test as many
> things as possible at once.

Then you do not understand the main purpose of regression tests, which is
to not only catch, but also to fix regressions. And your test code seems
to be designed to make the latter very, very hard. Just like your response
to my review almost appears to be designed to be defiant rather than
constructive.

If you want to accept my help in making your code better, we can continue.
Otherwise I am afraid that your patch is stuck in the "not good enough"
pile.

> > You can even write that in a much
> > easier way:
> >
> > -- snip --
> > test_expect_success '--first-parent --bisect-all lists correct revs' '
> >         git rev-list --first-parent --bisect-all F..E >revs &&
> >         # E and e1..e8 need to be shown, F and f1..f4 not
> >         test_line_count = 9 revs &&
> >         for rev in E e1 e2 e3 e4 e5 e6 e7 e8
> >         do
> >                 grep "^$(git rev-parse $rev) " revs || {
> >                         echo "$rev not shown" >&2 &&
> >                         return 1
> >                 }
> >         done
> > '
> > -- snap --
> >
> > To test more precisely for the order or the distance would be both
> > overkill and likely to be unreadable.
> 
> I don't think the previous version was either overkill or unreadable.

Okay, this is too tedious. Seriously, why do you make it so unpleasant to
help you improve the patch.

> Yeah it had other (potential in my opinion) problems, but I was trying
> with my suggestion to find a good balance between the different
> requirements (readability, complexity, maintainability, testing many
> things).
> 
> By the way with only the 2 requirements you list above, I think the
> simplest would be sorting things like I do it in my suggestion without
> checking that the (dist=X) are in the right order with just:
> 
> test_expect_success "--bisect-all --first-parent" '
>     cat >expect <<EOF &&
> $(git rev-parse CC) (dist=2)
> $(git rev-parse EX) (dist=1)
> $(git rev-parse D) (dist=1)
> $(git rev-parse FX) (dist=0)
> EOF
>     sort expect >expect_sorted &&
>     git rev-list --bisect-all --first-parent FX ^A >actual &&
>     sort actual >actual_sorted &&
>     test_cmp expect_sorted actual_sorted
> '
> 
> > To test `--bisect-vars` here would be excessive, as the part that handles
> > that particular option is not even touched. All that is touched is the
> > logic in the bisect algorithm in conjunction with --first-parent. And that
> > is all that should be tested here.
> 
> I don't agree with that. I think that as we now enable --first-parent
> with all the --bisect* options, we should test at least once all the
> --bisect* options with --first-parent.

But why? Why, why, why? Do you touch code that changes all of those code
paths? No you don't. Not at all. You change a code path that is shared by
those options, so if there is a regression, it affects all of those
options.

Your proposed test is too extensive and therefore just *wastes space and
time for no good reason*.

> > With a test like the one I outlined above, I only have one more gripe
> > about the patch: the commit message does nothing to explain this part of
> > the diff:
> >
> > +                               if ((bisect_flags & BISECT_FIRST_PARENT)) {
> > +                                       if (weight(q) < 0)
> > +                                               q = NULL;
> > +                                       break;
> > +                               }
> >
> > And I would really, really like that to be explained in the commit
> > message. Because to me, it is completely opaque why this needs to be here.
> 
> This was suggested by Junio in a previous iteration and I agree that a
> comment would be welcome.

That is a very roundabout way of saying that this explanation is required.

> > The rest of the diff is pretty obvious, though.
> 
> Thanks for taking a look anyway,

There is absolutely no need to be flippant and disrespectful about this.

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-26 15:41       ` Christian Couder
  2018-06-26 21:16         ` Johannes Schindelin
@ 2018-06-26 22:20         ` Junio C Hamano
  2018-06-27 11:48           ` Johannes Schindelin
  1 sibling, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2018-06-26 22:20 UTC (permalink / raw)
  To: Christian Couder
  Cc: Johannes Schindelin, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Christian Couder <christian.couder@gmail.com> writes:

> Obviousness is often not the same for everybody.

... which you just learned---what you thought obvious turns out to
be not so obvious after all, so you adjust to help your readers.

>> In this particular case it even feels as if this test is not even testing
>> what it should test at all:
>>
>> - it should verify that all of the commits in the first parent lineage are
>>   part of the list
>
> It does that.
>
>> - it should verify that none of the other commits are in the list
>
> It does that too.

But the point is it does a lot more by insisting exact output.  For
example, the version I reviewed had a two "expected output", and
said that the actual output must match either one of them.  I guess
it was because there were two entries with the same distance and we
cannot rely on which order they appear in the result?  If a test
history gained another entry with the same distance, then would we
need 6 possible expected output because we cannot rely on the order
in which these three come out?

That was the only thing I was complaining about.  Dscho gave me too
much credit and read a lot more good things than what I actually
meant to say ;-).

>> And that is really all there is to test.

Another is that "rev-list --bisect-all" promises that the entries
are ordered by the distance value.  So taking the above three
points, perhaps

	cat >expect <<EOF &&
	... as written in one of the expect list in Tiago's patch
	EOF

	# Make sure we have the same entries, nothing more, nothing less
	git rev-list --bisect-all $other_args >actual &&
	sort actual >actual.sorted &&
	sort expect >expect.sorted &&
	test_cmp expect.sorted actual.sorted

	# Make sure the entries are sorted in the dist order
	sed -e 's/.*(dist=\([1-9]*[0-9]\)).*/\1/' actual >actual.dists &&
	sort actual.dists >actual.dists.sorted &&
	test_cmp actual.dists.sorted actual.dists

is what I would have expected.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-26 22:20         ` Junio C Hamano
@ 2018-06-27 11:48           ` Johannes Schindelin
  2018-06-27 16:26             ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2018-06-27 11:48 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Christian Couder, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Hi Junio,

On Tue, 26 Jun 2018, Junio C Hamano wrote:

> Christian Couder <christian.couder@gmail.com> writes:
> 
> > Obviousness is often not the same for everybody.
> 
> ... which you just learned---what you thought obvious turns out to
> be not so obvious after all, so you adjust to help your readers.

Indeed. And trying to tell the reader that they should find it obvious is
not exactly productive. It only causes bad feelings and can be easily
avoided.

> >> In this particular case it even feels as if this test is not even
> >> testing what it should test at all:
> >>
> >> - it should verify that all of the commits in the first parent
> >> lineage are part of the list
> >
> > It does that.
> >
> >> - it should verify that none of the other commits are in the list
> >
> > It does that too.
> 
> But the point is it does a lot more by insisting exact output.

Indeed. One thing it does in addition is to make the test code a lot less
obvious for readers in general.

Let me summarize again what good regression tests have to deliver, because
I think it cannot be stressed enough, especially in this context where we
run the danger of adding poor regression tests:

- a regression test needs to catch regressions (d'oh)

- a regression test needs to be *quick*. Otherwise developers will skip
  running them, which is worse than having no tests at all (because the
  effort to develop them is wasted)

- a regression test must make it as easy as possible to fix regressions

There are quite a few corollaries to that last point, some of which are:

- a regression test that fails for anything but an obvious reason is
  *useless*

- a regression test that tries to test for *everything* (including dogs and
  cats) not only breaks the quickness requirement, but it also makes it
  confusing where to start fixing things. "The test talked about --bisect
  but now I am stuck somewhere in XYZ.c, what has *that* to do with
  --bisect?" is *not* something you want to risk any developer yelling at
  the test code that you authored

- less is more. If you can use commits that were already generated in the
  common "setup" step, there is literally a negative value in generating a
  new set of commits instead

- less is more. If you can catch the same regressions in three, concise
  *and* understandable lines, avoid using thirty lines instead

- regression tests should not need to be adjusted when the logic changes
  in an intended way. It is a strong sign that a regression test was
  written badly if it starts failing for any reason other than a
  regression

The overzealous "I want this output to be exactly this" stance of the
tests we are discussing here is a very obvious violation here.

Regression tests should fail only to indicate regressions.

Really. Let me repeat that because it is so obvious when you think about
it, but it is easy to forget when writing regression tests:

Regression tests should fail only to indicate regressions.

> For example, the version I reviewed had a two "expected output", and
> said that the actual output must match either one of them.  I guess it
> was because there were two entries with the same distance and we cannot
> rely on which order they appear in the result?  If a test history gained
> another entry with the same distance, then would we need 6 possible
> expected output because we cannot rely on the order in which these three
> come out?

It is totally unnecessary to go there, as it would make those regression
tests a lot less valuable than they could otherwise be. Let me elaborate
further below.

> That was the only thing I was complaining about.  Dscho gave me too
> much credit and read a lot more good things than what I actually
> meant to say ;-).

Why don't you just accept my praise gracefully? ;-)

It's not that I gave you a lot of praise recently, even if you clearly
deserve it.

> >> And that is really all there is to test.
> 
> Another is that "rev-list --bisect-all" promises that the entries
> are ordered by the distance value.

Yes! And you know what we can do there? We can test *precisely* that!

	# verify that the output is sorted by `dist` (descending)
	sed "s/.*dist=\([0-9]*\).*/\1/" <revs >dist &&
	sort -n -r <dist >dist.sorted &&
	test_cmp dist dist.sorted

This extracts the distance numbers into their own file, then verifies that
they were already sorted.

The really big advantage here is that any future change that might result
in a different order of entries with the same "dist" value *will not cause
this regression test to fail*. And for a good reason: because ordering
identical "dist" values differently is not a regression at all.

> So taking the above three points, perhaps
> 
> 	cat >expect <<EOF &&
> 	... as written in one of the expect list in Tiago's patch
> 	EOF

Please no. Please let's *not* generate more commits when we already have a
perfectly fine set of commits generated by the setup phase of the very
same script. Please let's not use confusing names for those commits.
Please let's not add a diagram whos layout deviates for no good reason
from the layout used by the existing diagram in the same file.

> 	# Make sure we have the same entries, nothing more, nothing less
> 	git rev-list --bisect-all $other_args >actual &&
> 	sort actual >actual.sorted &&
> 	sort expect >expect.sorted &&
> 	test_cmp expect.sorted actual.sorted

With plenty experience in investigating test failures of Git's test suite
under my belt, let me tell you how much faster it is for me to start
debugging when reading this code instead:

	git rev-list --bisect-all --first-parent F..E >revs &&
	# only E, e1..e8 should be listed, nothing else
	test_line_count = 9 revs &&
	for rev in E e1 e2 e3 e4 e5 e6 e7 e8
	do
		grep "^$(git rev-parse $rev) " revs || return
	done

I am faster by... a lot. Like, seconds instead of minutes.

As a bonus, it is also shorter. Quicker to read. Easier to grok. Quicker
to validate manually.

> 	# Make sure the entries are sorted in the dist order
> 	sed -e 's/.*(dist=\([1-9]*[0-9]\)).*/\1/' actual >actual.dists &&
> 	sort actual.dists >actual.dists.sorted &&
> 	test_cmp actual.dists.sorted actual.dists
> 
> is what I would have expected.

... but without the [1-9] because the distance can be 0. And with a star
after the [0-9] instead.

And then you have essentially the very same thing I suggested above. A
readable regression test, that only triggers failures upon the regression
we want to prevent, is fast to grok, actionable, and leads to fast
debugging.

Just to make sure: I am utterly disinterested in pushing "my" code
through. I am very much interested in having a good patch with a good
regression test and a good commit message. Otherwise I would not put up
with what it takes to review it.

So Chris, if you feel that you must "win" against me, just take Junio's
code already. In that case, however, do change his code to avoid
generating those unnecessary commits.

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-27 11:48           ` Johannes Schindelin
@ 2018-06-27 16:26             ` Junio C Hamano
  2018-06-28 13:08               ` Johannes Schindelin
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2018-06-27 16:26 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Christian Couder, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> 	git rev-list --bisect-all --first-parent F..E >revs &&
> 	# only E, e1..e8 should be listed, nothing else
> 	test_line_count = 9 revs &&
> 	for rev in E e1 e2 e3 e4 e5 e6 e7 e8
> 	do
> 		grep "^$(git rev-parse $rev) " revs || return
> 	done
>
> I am faster by... a lot. Like, seconds instead of minutes.

I'm fine either way.  I just thought you would not want 9 separate
invocations of grep ;-)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-27 16:26             ` Junio C Hamano
@ 2018-06-28 13:08               ` Johannes Schindelin
  2018-06-28 16:12                 ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2018-06-28 13:08 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Christian Couder, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Hi Junio,

On Wed, 27 Jun 2018, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > 	git rev-list --bisect-all --first-parent F..E >revs &&
> > 	# only E, e1..e8 should be listed, nothing else
> > 	test_line_count = 9 revs &&
> > 	for rev in E e1 e2 e3 e4 e5 e6 e7 e8
> > 	do
> > 		grep "^$(git rev-parse $rev) " revs || return
> > 	done
> >
> > I am faster by... a lot. Like, seconds instead of minutes.
> 
> I'm fine either way.  I just thought you would not want 9 separate
> invocations of grep ;-)

I don't.

I like the unnecessary test_commit calls even less ;-)

And yes, we could avoid those `grep` calls, I know. By something as
convoluted as

	revs="$(cat revs)" &&
	for rev in $(git rev-parse E e1 e2 e3 e4 e5 e6 e7 e8)
	do
		case "$LF$revs" in
		*"$LF$rev "*) ;; # okay
		*) echo "Missing: $rev">&2; return 1;;
		esac
	done

Is this really what you would call an easy-to-understand test? Maybe for
you, a Unix oldtimer and shell scripting king.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-28 13:08               ` Johannes Schindelin
@ 2018-06-28 16:12                 ` Junio C Hamano
  2018-06-29 11:20                   ` Johannes Schindelin
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2018-06-28 16:12 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Christian Couder, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> On Wed, 27 Jun 2018, Junio C Hamano wrote:
>
>> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>> 
>> > 	git rev-list --bisect-all --first-parent F..E >revs &&
>> > 	# only E, e1..e8 should be listed, nothing else
>> > 	test_line_count = 9 revs &&
>> > 	for rev in E e1 e2 e3 e4 e5 e6 e7 e8
>> > 	do
>> > 		grep "^$(git rev-parse $rev) " revs || return
>> > 	done
>> >
>> > I am faster by... a lot. Like, seconds instead of minutes.
>> 
>> I'm fine either way.  I just thought you would not want 9 separate
>> invocations of grep ;-)
>
> I don't.
>
> I like the unnecessary test_commit calls even less ;-)
>
> And yes, we could avoid those `grep` calls, I know. By something as
> convoluted as

I now think you are reading me wrong ;-)  

I think you already established pretty well that it is a good idea
to avoid introducing different history to run the new test on, when
there is perfectly usable one available.  That part, i.e. test_commit,
I do not think we have anything to disagree about.

What I meant by "many separte grep calls" was to contrast these two
approaches:

 * Have one typical output spelled out as "expect", take an output
   from an actual run into "actual", make them comparable and then
   do a compare (which does not use grep; it uses sort in the
   "making comparable" phase)

 * Not have any typical output, take an output from an actual run,
   and have _code_ that inspects the output encode the rule over the
   output (e.g. "these must exist" is inspected with many grep
   invocations)

Two things the "output must have 9 entries, and these 9 must be
mentioned" we see at the beginning of this message does not verify
are (1) exact dist value given to each of these entries and (2) the
order in which these entries appear in the output.  The latter is
something we document, and the test should cover.  The former does
not have to be cast in stone (i.e. I do not think it does not make a
difference to label the edge commits with dist=1 or dist=0 as long
as everything is consistent), but if there is no strong reason to
keep it possible for us to later change how the numbers are assigned,
I am OK if the test cast it in stone.

Another reason (other than "many separate invocation of grep") to
favor the former approach (i.e. use real-looking expected output,
munge it and the real output into comparable forms and then compare)
is that it is easier to see what aspect of the output we care (and
we do not care) about.

It is harder to see the omission of exact dist value and ordering of
entries in the outpu in the latter approach, and more importantly,
know if the omission was deliberate (e.g. it was merely an example)
or a mere mistake.

With "using a real-looking expected output, make it and the actual
output comparable and then compare" approach, the aspects in the
output we choose not to care about will show in the "make them
comparable" munging.  If we do not care the exact dist values, there
would be something like s/dist=[0-9]*/dist=X/ to mask the exact
value before making the two comparable to see that the expect and
the actual files have the same entries.  If we still do care about
the entries are ordered by the dist values, there would be something
that sorts the entries with the actual dist values before doing that
masking to ensure if the order is correct.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-28 16:12                 ` Junio C Hamano
@ 2018-06-29 11:20                   ` Johannes Schindelin
  2018-07-03 21:33                     ` Tiago Botelho
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2018-06-29 11:20 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Christian Couder, Tiago Botelho, git, Harald Nordgren,
	Tiago Botelho

Hi Junio,

On Thu, 28 Jun 2018, Junio C Hamano wrote:

> What I meant by "many separte grep calls" was to contrast these two
> approaches:
> 
>  * Have one typical output spelled out as "expect", take an output
>    from an actual run into "actual", make them comparable and then
>    do a compare (which does not use grep; it uses sort in the
>    "making comparable" phase)
> 
>  * Not have any typical output, take an output from an actual run,
>    and have _code_ that inspects the output encode the rule over the
>    output (e.g. "these must exist" is inspected with many grep
>    invocations)
> 
> Two things the "output must have 9 entries, and these 9 must be
> mentioned" we see at the beginning of this message does not verify
> are (1) exact dist value given to each of these entries and (2) the
> order in which these entries appear in the output.  The latter is
> something we document, and the test should cover.  The former does
> not have to be cast in stone (i.e. I do not think it does not make a
> difference to label the edge commits with dist=1 or dist=0 as long
> as everything is consistent), but if there is no strong reason to
> keep it possible for us to later change how the numbers are assigned,
> I am OK if the test cast it in stone.
> 
> Another reason (other than "many separate invocation of grep") to
> favor the former approach (i.e. use real-looking expected output,
> munge it and the real output into comparable forms and then compare)
> is that it is easier to see what aspect of the output we care (and
> we do not care) about.
> 
> It is harder to see the omission of exact dist value and ordering of
> entries in the outpu in the latter approach, and more importantly,
> know if the omission was deliberate (e.g. it was merely an example)
> or a mere mistake.
> 
> With "using a real-looking expected output, make it and the actual
> output comparable and then compare" approach, the aspects in the
> output we choose not to care about will show in the "make them
> comparable" munging.  If we do not care the exact dist values, there
> would be something like s/dist=[0-9]*/dist=X/ to mask the exact
> value before making the two comparable to see that the expect and
> the actual files have the same entries.  If we still do care about
> the entries are ordered by the dist values, there would be something
> that sorts the entries with the actual dist values before doing that
> masking to ensure if the order is correct.

The problem here is of course that you *cannot* set the exact output
in stone, because of sorting instabilities.

So you have to play tricks to sort (twice, with different keys) the
expected output and the actual output, to verify that all the expected
commits are listed (and only those) and to verify that they are ordered by
the distance, in descending order.

And this trick, while it still makes the test correct and stable and yadda
yadda, *also* makes this trick a lot less readable than my version. And
therefore makes it more difficult to verify that your test actually does
what it is supposed to do.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-06-29 11:20                   ` Johannes Schindelin
@ 2018-07-03 21:33                     ` Tiago Botelho
  2018-07-03 22:36                       ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Tiago Botelho @ 2018-07-03 21:33 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Junio C Hamano, Christian Couder, git, Harald Nordgren,
	Tiago Botelho

On Fri, 29 Jun 2018 at 12:21, Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi Junio,
>
> On Thu, 28 Jun 2018, Junio C Hamano wrote:
>
> > What I meant by "many separte grep calls" was to contrast these two
> > approaches:
> >
> >  * Have one typical output spelled out as "expect", take an output
> >    from an actual run into "actual", make them comparable and then
> >    do a compare (which does not use grep; it uses sort in the
> >    "making comparable" phase)
> >
> >  * Not have any typical output, take an output from an actual run,
> >    and have _code_ that inspects the output encode the rule over the
> >    output (e.g. "these must exist" is inspected with many grep
> >    invocations)
> >
> > Two things the "output must have 9 entries, and these 9 must be
> > mentioned" we see at the beginning of this message does not verify
> > are (1) exact dist value given to each of these entries and (2) the
> > order in which these entries appear in the output.  The latter is
> > something we document, and the test should cover.  The former does
> > not have to be cast in stone (i.e. I do not think it does not make a
> > difference to label the edge commits with dist=1 or dist=0 as long
> > as everything is consistent), but if there is no strong reason to
> > keep it possible for us to later change how the numbers are assigned,
> > I am OK if the test cast it in stone.
> >
> > Another reason (other than "many separate invocation of grep") to
> > favor the former approach (i.e. use real-looking expected output,
> > munge it and the real output into comparable forms and then compare)
> > is that it is easier to see what aspect of the output we care (and
> > we do not care) about.
> >
> > It is harder to see the omission of exact dist value and ordering of
> > entries in the outpu in the latter approach, and more importantly,
> > know if the omission was deliberate (e.g. it was merely an example)
> > or a mere mistake.
> >
> > With "using a real-looking expected output, make it and the actual
> > output comparable and then compare" approach, the aspects in the
> > output we choose not to care about will show in the "make them
> > comparable" munging.  If we do not care the exact dist values, there
> > would be something like s/dist=[0-9]*/dist=X/ to mask the exact
> > value before making the two comparable to see that the expect and
> > the actual files have the same entries.  If we still do care about
> > the entries are ordered by the dist values, there would be something
> > that sorts the entries with the actual dist values before doing that
> > masking to ensure if the order is correct.
>
> The problem here is of course that you *cannot* set the exact output
> in stone, because of sorting instabilities.
>
> So you have to play tricks to sort (twice, with different keys) the
> expected output and the actual output, to verify that all the expected
> commits are listed (and only those) and to verify that they are ordered by
> the distance, in descending order.
>
> And this trick, while it still makes the test correct and stable and yadda
> yadda, *also* makes this trick a lot less readable than my version. And
> therefore makes it more difficult to verify that your test actually does
> what it is supposed to do.
>
> Ciao,
> Dscho

Hello,

first of all I would like to thank all the feedback provided in this patch it
has truly helped me progress on my first contribution to git.

After looking through Junio's and Johannes's suggestions I believe that
the *only* test we should add will be something like:

-- snip --
test_expect_success '--first-parent --bisect-all lists correct revs' '
git rev-list --first-parent --bisect-all F..E >revs &&
test_line_count = 9 revs &&
for rev in E e1 e2 e3 e4 e5 e6 e7 e8
do
  grep "^$(git rev-parse $rev) " revs ||
  {
    echo "$rev not shown" >&2 &&
    return 1
  }
done &&
sed -e "s/.*(dist=\([0-9]*\)).*/\1/" revs >actual.dists &&
sort -r actual.dists >actual.dists.sorted &&
test_cmp actual.dists.sorted actual.dists
'
-- snap --

The only change I had to make was use -r in sort to
revert the sorting in `sort` otherwise we get it in
ascending order but the revs are provided in descending order.

Kind regards,
Tiago

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-07-03 21:33                     ` Tiago Botelho
@ 2018-07-03 22:36                       ` Junio C Hamano
  2018-07-04 10:26                         ` Johannes Schindelin
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2018-07-03 22:36 UTC (permalink / raw)
  To: Tiago Botelho
  Cc: Johannes Schindelin, Christian Couder, git, Harald Nordgren,
	Tiago Botelho

Tiago Botelho <tiagonbotelho@gmail.com> writes:

> git rev-list --first-parent --bisect-all F..E >revs &&
> test_line_count = 9 revs &&
> for rev in E e1 e2 e3 e4 e5 e6 e7 e8
> do
>   grep "^$(git rev-parse $rev) " revs ||
>   {
>     echo "$rev not shown" >&2 &&
>     return 1
>   }
> done &&
> sed -e "s/.*(dist=\([0-9]*\)).*/\1/" revs >actual.dists &&
> sort -r actual.dists >actual.dists.sorted &&
> test_cmp actual.dists.sorted actual.dists

The distance in the current graph might be all lower single-digits,
but you need "sort -n -r" to be future-proof, as dist=10 would sort
next to dist=1, perhaps in between dist=1 and dist=2.

Another thing you missed in my message is that the above does not
say what distance value each commit should be assigned in the
history.  Even though the grep loop makes sure that each of E, e1,
... e8 appears at least once, and line-count before that ensures
that the output has 9 entries (and taken together, it guarantees
that each of these appears not just "at least once", but also
exactly once), nothing guarantees if they are getting relative
distance correctly, as the sed strips a bit too much (and that
relates to my earlier point why starting from a concrete expected
output and explicitly discard the aspect of the output we do not
care about before comparison---that way, we can easily tell when the
code is _designed to_ discard too much).

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-07-03 22:36                       ` Junio C Hamano
@ 2018-07-04 10:26                         ` Johannes Schindelin
  2018-07-06 18:14                           ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2018-07-04 10:26 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Tiago Botelho, Christian Couder, git, Harald Nordgren,
	Tiago Botelho

Hi Junio,

On Tue, 3 Jul 2018, Junio C Hamano wrote:

> Tiago Botelho <tiagonbotelho@gmail.com> writes:
> 
> > git rev-list --first-parent --bisect-all F..E >revs &&
> > test_line_count = 9 revs &&
> > for rev in E e1 e2 e3 e4 e5 e6 e7 e8
> > do
> >   grep "^$(git rev-parse $rev) " revs ||
> >   {
> >     echo "$rev not shown" >&2 &&
> >     return 1
> >   }
> > done &&
> > sed -e "s/.*(dist=\([0-9]*\)).*/\1/" revs >actual.dists &&
> > sort -r actual.dists >actual.dists.sorted &&
> > test_cmp actual.dists.sorted actual.dists
> 
> The distance in the current graph might be all lower single-digits,
> but you need "sort -n -r" to be future-proof, as dist=10 would sort
> next to dist=1, perhaps in between dist=1 and dist=2.
> 
> Another thing you missed in my message is that the above does not
> say what distance value each commit should be assigned in the
> history.  Even though the grep loop makes sure that each of E, e1,
> ... e8 appears at least once, and line-count before that ensures
> that the output has 9 entries (and taken together, it guarantees
> that each of these appears not just "at least once", but also
> exactly once), nothing guarantees if they are getting relative
> distance correctly, as the sed strips a bit too much (and that
> relates to my earlier point why starting from a concrete expected
> output and explicitly discard the aspect of the output we do not
> care about before comparison---that way, we can easily tell when the
> code is _designed to_ discard too much).

From my point of view, this indicates that you want to set those exact
dist values in stone. I am not sure I follow that reasoning, as this
particular test case's purpose is to ensure that the `--first-parent`
option works with `--bisect`, not that the distance values match a fixed
expectation.

In short: I disagree that the *exact* values (beyond testing the order)
should be tested *here*.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-07-04 10:26                         ` Johannes Schindelin
@ 2018-07-06 18:14                           ` Junio C Hamano
  2018-07-06 20:33                             ` Johannes Schindelin
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2018-07-06 18:14 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Tiago Botelho, Christian Couder, git, Harald Nordgren,
	Tiago Botelho

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> > git rev-list --first-parent --bisect-all F..E >revs &&
>> > test_line_count = 9 revs &&
>> > for rev in E e1 e2 e3 e4 e5 e6 e7 e8
>> > do
>> >   grep "^$(git rev-parse $rev) " revs ||
>> >   {
>> >     echo "$rev not shown" >&2 &&
>> >     return 1
>> >   }
>> > done &&
>> > sed -e "s/.*(dist=\([0-9]*\)).*/\1/" revs >actual.dists &&
>> > sort -r actual.dists >actual.dists.sorted &&
>> > test_cmp actual.dists.sorted actual.dists
>> 
> From my point of view, this indicates that you want to set those exact
> dist values in stone.

As I already said, I do not think it is absolutely necessary to
declare that the minimum dist is 0 or 1, or how big one step of dist
is.  For those reading from the sidelines, the history we are
testing this new feature over looks like this

#     E		dist=0
#    / \
#   e1  |	dist=1
#   |   |
#   e2  |	dist=2
#   |   |       ...
#   |   |       ...
#   e7  |	dist=2
#   |   |
#   e8  |	dist=1
#    \ /
#     F		dist=0

Current code will say dist=0 for E and F, dist=1 for e1 and e8,
etc., and I am fine if the code suddenly start saying that E and F
(i.e. those at the boundary of the graph) have dist=1 and one hop
weighs 10 so dist=11 for e1 and e8 (i.e. those at one hop from the
boundary).

But I am not fine if E and F get larger dist than e1 and e8, or e1
and e8 get different ones.  I do not think the code quoted upfront
would catch such future breakages.

And I also do not see a reason why somebody wants to make the dist
computation to be 1-based (iow, changing the minimum from 0 to 1) or
one step not to be 1 (iow, giving 11 to e1 and e8), so while I agree
it is not strictly necessary to cast the concrete distance value in
stone, I do not see much harm doing so *if* it helps to make it
simpler the test that is necessary to make sure relative dist values
assigned to these commits are in correct order.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-07-06 18:14                           ` Junio C Hamano
@ 2018-07-06 20:33                             ` Johannes Schindelin
  2018-07-06 21:43                               ` Junio C Hamano
  0 siblings, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2018-07-06 20:33 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Tiago Botelho, Christian Couder, git, Harald Nordgren,
	Tiago Botelho

Hi Junio,

On Fri, 6 Jul 2018, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> >> > git rev-list --first-parent --bisect-all F..E >revs &&
> >> > test_line_count = 9 revs &&
> >> > for rev in E e1 e2 e3 e4 e5 e6 e7 e8
> >> > do
> >> >   grep "^$(git rev-parse $rev) " revs ||
> >> >   {
> >> >     echo "$rev not shown" >&2 &&
> >> >     return 1
> >> >   }
> >> > done &&
> >> > sed -e "s/.*(dist=\([0-9]*\)).*/\1/" revs >actual.dists &&
> >> > sort -r actual.dists >actual.dists.sorted &&
> >> > test_cmp actual.dists.sorted actual.dists
> >> 
> > From my point of view, this indicates that you want to set those exact
> > dist values in stone.
> 
> As I already said, I do not think it is absolutely necessary to
> declare that the minimum dist is 0 or 1, or how big one step of dist
> is.  For those reading from the sidelines, the history we are
> testing this new feature over looks like this
> 
> #     E		dist=0
> #    / \
> #   e1  |	dist=1
> #   |   |
> #   e2  |	dist=2
> #   |   |       ...
> #   |   |       ...
> #   e7  |	dist=2
> #   |   |
> #   e8  |	dist=1
> #    \ /
> #     F		dist=0
> 
> Current code will say dist=0 for E and F, dist=1 for e1 and e8,
> etc., and I am fine if the code suddenly start saying that E and F
> (i.e. those at the boundary of the graph) have dist=1 and one hop
> weighs 10 so dist=11 for e1 and e8 (i.e. those at one hop from the
> boundary).
> 
> But I am not fine if E and F get larger dist than e1 and e8, or e1
> and e8 get different ones.  I do not think the code quoted upfront
> would catch such future breakages.
> 
> And I also do not see a reason why somebody wants to make the dist
> computation to be 1-based (iow, changing the minimum from 0 to 1) or
> one step not to be 1 (iow, giving 11 to e1 and e8), so while I agree
> it is not strictly necessary to cast the concrete distance value in
> stone, I do not see much harm doing so *if* it helps to make it
> simpler the test that is necessary to make sure relative dist values
> assigned to these commits are in correct order.

I guess that you still want to misunderstand me.

Because it is not really hard to understand what I said: a good regression
test will verify precisely what you want to ensure, not precisely what the
command's output is.

So in this case, quite obviously what you want to do is to verify that E
and F get larger dist than e1 and e8. So that is what you test for. Not
some fixed text that might require adjusting in the future for any other
reason than a real bug.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC PATCH v5] Implement --first-parent for git rev-list --bisect
  2018-07-06 20:33                             ` Johannes Schindelin
@ 2018-07-06 21:43                               ` Junio C Hamano
  0 siblings, 0 replies; 18+ messages in thread
From: Junio C Hamano @ 2018-07-06 21:43 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Tiago Botelho, Christian Couder, git, Harald Nordgren,
	Tiago Botelho

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> And I also do not see a reason why somebody wants to make the dist
>> computation to be 1-based (iow, changing the minimum from 0 to 1) or
>> one step not to be 1 (iow, giving 11 to e1 and e8), so while I agree
>> it is not strictly necessary to cast the concrete distance value in
>> stone, I do not see much harm doing so *if* it helps to make it
>> simpler the test that is necessary to make sure relative dist values
>> assigned to these commits are in correct order.
>
> I guess that you still want to misunderstand me.

Not at all.  

> So in this case, quite obviously what you want to do is to verify that E
> and F get larger dist than e1 and e8. So that is what you test for. Not
> some fixed text that might require adjusting in the future for any other
> reason than a real bug.

The most important part of the above quote is "*if* it helps..."
part, and I do think the downside of insisting on dist being exactly
0 for E and F is acceptable than having to write the "E and F must
get the same dist value, and e1 and e8 must get the same dist value
that is larger than the value that E and F gets, ..." test without
doing so *and* still keep the resulting test readable.  It is a
tradeoff and we are drawing the line differently (I am being more
practical here than insisting on requiring absolute minimum and
nothing more).

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-07-06 21:43 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-22 12:39 [RFC PATCH v5] Implement --first-parent for git rev-list --bisect Tiago Botelho
2018-06-25 17:33 ` Junio C Hamano
2018-06-26 11:59   ` Christian Couder
2018-06-26 14:10     ` Johannes Schindelin
2018-06-26 15:41       ` Christian Couder
2018-06-26 21:16         ` Johannes Schindelin
2018-06-26 22:20         ` Junio C Hamano
2018-06-27 11:48           ` Johannes Schindelin
2018-06-27 16:26             ` Junio C Hamano
2018-06-28 13:08               ` Johannes Schindelin
2018-06-28 16:12                 ` Junio C Hamano
2018-06-29 11:20                   ` Johannes Schindelin
2018-07-03 21:33                     ` Tiago Botelho
2018-07-03 22:36                       ` Junio C Hamano
2018-07-04 10:26                         ` Johannes Schindelin
2018-07-06 18:14                           ` Junio C Hamano
2018-07-06 20:33                             ` Johannes Schindelin
2018-07-06 21:43                               ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).