git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 04/16] upload-pack: make reachable() more generic
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
@ 2018-06-19 20:25 ` Derrick Stolee via GitGitGadget
  2018-06-19 20:35 ` [PATCH 05/16] upload-pack: refactor ok_to_give_up() Derrick Stolee via GitGitGadget
                   ` (16 subsequent siblings)
  17 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-06-19 20:25 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

In anticipation of moving the reachable() method to commit-reach.c,
modify the prototype to be more generic to flags known outside of
upload-pack.c. Also rename 'want' to 'from' to make the statement
more clear outside of the context of haves/wants negotiation.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 upload-pack.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 4ca052d0b..0ed470713 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -336,17 +336,17 @@ static int got_oid(const char *hex, struct object_id *oid)
 	return 0;
 }
 
-static int reachable(struct commit *want)
+static int reachable(struct commit *from, int with_flag, int assign_flag)
 {
 	struct prio_queue work = { compare_commits_by_commit_date };
 
-	prio_queue_put(&work, want);
+	prio_queue_put(&work, from);
 	while (work.nr) {
 		struct commit_list *list;
 		struct commit *commit = prio_queue_get(&work);
 
-		if (commit->object.flags & THEY_HAVE) {
-			want->object.flags |= COMMON_KNOWN;
+		if (commit->object.flags & with_flag) {
+			from->object.flags |= assign_flag;
 			break;
 		}
 		if (!commit->object.parsed)
@@ -362,10 +362,10 @@ static int reachable(struct commit *want)
 				prio_queue_put(&work, parent);
 		}
 	}
-	want->object.flags |= REACHABLE;
-	clear_commit_marks(want, REACHABLE);
+	from->object.flags |= REACHABLE;
+	clear_commit_marks(from, REACHABLE);
 	clear_prio_queue(&work);
-	return (want->object.flags & COMMON_KNOWN);
+	return (from->object.flags & assign_flag);
 }
 
 static int ok_to_give_up(void)
@@ -390,7 +390,7 @@ static int ok_to_give_up(void)
 			want_obj.objects[i].item->flags |= COMMON_KNOWN;
 			continue;
 		}
-		if (!reachable((struct commit *)want))
+		if (!reachable((struct commit *)want, THEY_HAVE, COMMON_KNOWN))
 			return 0;
 	}
 	return 1;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 05/16] upload-pack: refactor ok_to_give_up()
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
  2018-06-19 20:25 ` [PATCH 04/16] upload-pack: make reachable() more generic Derrick Stolee via GitGitGadget
@ 2018-06-19 20:35 ` Derrick Stolee via GitGitGadget
  2018-06-25 17:16 ` [PATCH 01/16] commit-reach: move walk methods from commit.c Derrick Stolee via GitGitGadget
                   ` (15 subsequent siblings)
  17 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-06-19 20:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

In anticipation of consolidating all commit reachability algorithms,
refactor ok_to_give_up() in order to allow splitting its logic into
an external method.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 upload-pack.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 0ed470713..e7ad7f24b 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -368,34 +368,45 @@ static int reachable(struct commit *from, int with_flag, int assign_flag)
 	return (from->object.flags & assign_flag);
 }
 
-static int ok_to_give_up(void)
+/*
+ * Determine if every commit in 'from' can reach at least one commit
+ * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
+ * as a marker for commits that are already visited.
+ */
+static int can_all_from_reach_with_flag(struct object_array *from,
+					int with_flag, int assign_flag)
 {
 	int i;
 
-	if (!have_obj.nr)
-		return 0;
-
-	for (i = 0; i < want_obj.nr; i++) {
-		struct object *want = want_obj.objects[i].item;
+	for (i = 0; i < from->nr; i++) {
+		struct object *from_one = from->objects[i].item;
 
-		if (want->flags & COMMON_KNOWN)
+		if (from_one->flags & assign_flag)
 			continue;
-		want = deref_tag(the_repository, want, "a want line", 0);
-		if (!want || want->type != OBJ_COMMIT) {
+		from_one = deref_tag(the_repository, from_one, "a from object", 0);
+		if (!from_one || from_one->type != OBJ_COMMIT) {
 			/* no way to tell if this is reachable by
 			 * looking at the ancestry chain alone, so
 			 * leave a note to ourselves not to worry about
 			 * this object anymore.
 			 */
-			want_obj.objects[i].item->flags |= COMMON_KNOWN;
+			from->objects[i].item->flags |= assign_flag;
 			continue;
 		}
-		if (!reachable((struct commit *)want, THEY_HAVE, COMMON_KNOWN))
+		if (!reachable((struct commit *)from_one, with_flag, assign_flag))
 			return 0;
 	}
 	return 1;
 }
 
+static int ok_to_give_up(void)
+{
+	if (!have_obj.nr)
+		return 0;
+
+	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, COMMON_KNOWN);
+}
+
 static int get_common_commits(void)
 {
 	struct object_id oid;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 01/16] commit-reach: move walk methods from commit.c
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
  2018-06-19 20:25 ` [PATCH 04/16] upload-pack: make reachable() more generic Derrick Stolee via GitGitGadget
  2018-06-19 20:35 ` [PATCH 05/16] upload-pack: refactor ok_to_give_up() Derrick Stolee via GitGitGadget
@ 2018-06-25 17:16 ` Derrick Stolee via GitGitGadget
  2018-07-16 18:57   ` Stefan Beller
  2018-07-16 21:31   ` Jonathan Tan
  2018-06-25 17:35 ` [PATCH 02/16] commit-reach: move ref_newer from remote.c Derrick Stolee via GitGitGadget
                   ` (14 subsequent siblings)
  17 siblings, 2 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-06-25 17:16 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Makefile       |   1 +
 commit-reach.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h |  41 ++++++
 commit.c       | 358 ------------------------------------------------
 4 files changed, 401 insertions(+), 358 deletions(-)
 create mode 100644 commit-reach.c
 create mode 100644 commit-reach.h

diff --git a/Makefile b/Makefile
index bb8bd6720..59781f4bc 100644
--- a/Makefile
+++ b/Makefile
@@ -829,6 +829,7 @@ LIB_OBJS += column.o
 LIB_OBJS += combine-diff.o
 LIB_OBJS += commit.o
 LIB_OBJS += commit-graph.o
+LIB_OBJS += commit-reach.o
 LIB_OBJS += compat/obstack.o
 LIB_OBJS += compat/terminal.o
 LIB_OBJS += config.o
diff --git a/commit-reach.c b/commit-reach.c
new file mode 100644
index 000000000..f2e2f7461
--- /dev/null
+++ b/commit-reach.c
@@ -0,0 +1,359 @@
+#include "cache.h"
+#include "prio-queue.h"
+#include "commit-reach.h"
+
+/* Remember to update object flag allocation in object.h */
+#define PARENT1		(1u<<16)
+#define PARENT2		(1u<<17)
+#define STALE		(1u<<18)
+#define RESULT		(1u<<19)
+
+static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
+
+static int queue_has_nonstale(struct prio_queue *queue)
+{
+	int i;
+	for (i = 0; i < queue->nr; i++) {
+		struct commit *commit = queue->array[i].data;
+		if (!(commit->object.flags & STALE))
+			return 1;
+	}
+	return 0;
+}
+
+/* all input commits in one and twos[] must have been parsed! */
+static struct commit_list *paint_down_to_common(struct commit *one, int n,
+						struct commit **twos,
+						int min_generation)
+{
+	struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
+	struct commit_list *result = NULL;
+	int i;
+	uint32_t last_gen = GENERATION_NUMBER_INFINITY;
+
+	one->object.flags |= PARENT1;
+	if (!n) {
+		commit_list_append(one, &result);
+		return result;
+	}
+	prio_queue_put(&queue, one);
+
+	for (i = 0; i < n; i++) {
+		twos[i]->object.flags |= PARENT2;
+		prio_queue_put(&queue, twos[i]);
+	}
+
+	while (queue_has_nonstale(&queue)) {
+		struct commit *commit = prio_queue_get(&queue);
+		struct commit_list *parents;
+		int flags;
+
+		if (commit->generation > last_gen)
+			BUG("bad generation skip %8x > %8x at %s",
+			    commit->generation, last_gen,
+			    oid_to_hex(&commit->object.oid));
+		last_gen = commit->generation;
+
+		if (commit->generation < min_generation)
+			break;
+
+		flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
+		if (flags == (PARENT1 | PARENT2)) {
+			if (!(commit->object.flags & RESULT)) {
+				commit->object.flags |= RESULT;
+				commit_list_insert_by_date(commit, &result);
+			}
+			/* Mark parents of a found merge stale */
+			flags |= STALE;
+		}
+		parents = commit->parents;
+		while (parents) {
+			struct commit *p = parents->item;
+			parents = parents->next;
+			if ((p->object.flags & flags) == flags)
+				continue;
+			if (parse_commit(p))
+				return NULL;
+			p->object.flags |= flags;
+			prio_queue_put(&queue, p);
+		}
+	}
+
+	clear_prio_queue(&queue);
+	return result;
+}
+
+static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos)
+{
+	struct commit_list *list = NULL;
+	struct commit_list *result = NULL;
+	int i;
+
+	for (i = 0; i < n; i++) {
+		if (one == twos[i])
+			/*
+			 * We do not mark this even with RESULT so we do not
+			 * have to clean it up.
+			 */
+			return commit_list_insert(one, &result);
+	}
+
+	if (parse_commit(one))
+		return NULL;
+	for (i = 0; i < n; i++) {
+		if (parse_commit(twos[i]))
+			return NULL;
+	}
+
+	list = paint_down_to_common(one, n, twos, 0);
+
+	while (list) {
+		struct commit *commit = pop_commit(&list);
+		if (!(commit->object.flags & STALE))
+			commit_list_insert_by_date(commit, &result);
+	}
+	return result;
+}
+
+struct commit_list *get_octopus_merge_bases(struct commit_list *in)
+{
+	struct commit_list *i, *j, *k, *ret = NULL;
+
+	if (!in)
+		return ret;
+
+	commit_list_insert(in->item, &ret);
+
+	for (i = in->next; i; i = i->next) {
+		struct commit_list *new_commits = NULL, *end = NULL;
+
+		for (j = ret; j; j = j->next) {
+			struct commit_list *bases;
+			bases = get_merge_bases(i->item, j->item);
+			if (!new_commits)
+				new_commits = bases;
+			else
+				end->next = bases;
+			for (k = bases; k; k = k->next)
+				end = k;
+		}
+		ret = new_commits;
+	}
+	return ret;
+}
+
+static int remove_redundant(struct commit **array, int cnt)
+{
+	/*
+	 * Some commit in the array may be an ancestor of
+	 * another commit.  Move such commit to the end of
+	 * the array, and return the number of commits that
+	 * are independent from each other.
+	 */
+	struct commit **work;
+	unsigned char *redundant;
+	int *filled_index;
+	int i, j, filled;
+
+	work = xcalloc(cnt, sizeof(*work));
+	redundant = xcalloc(cnt, 1);
+	ALLOC_ARRAY(filled_index, cnt - 1);
+
+	for (i = 0; i < cnt; i++)
+		parse_commit(array[i]);
+	for (i = 0; i < cnt; i++) {
+		struct commit_list *common;
+		uint32_t min_generation = array[i]->generation;
+
+		if (redundant[i])
+			continue;
+		for (j = filled = 0; j < cnt; j++) {
+			if (i == j || redundant[j])
+				continue;
+			filled_index[filled] = j;
+			work[filled++] = array[j];
+
+			if (array[j]->generation < min_generation)
+				min_generation = array[j]->generation;
+		}
+		common = paint_down_to_common(array[i], filled, work,
+					      min_generation);
+		if (array[i]->object.flags & PARENT2)
+			redundant[i] = 1;
+		for (j = 0; j < filled; j++)
+			if (work[j]->object.flags & PARENT1)
+				redundant[filled_index[j]] = 1;
+		clear_commit_marks(array[i], all_flags);
+		clear_commit_marks_many(filled, work, all_flags);
+		free_commit_list(common);
+	}
+
+	/* Now collect the result */
+	COPY_ARRAY(work, array, cnt);
+	for (i = filled = 0; i < cnt; i++)
+		if (!redundant[i])
+			array[filled++] = work[i];
+	for (j = filled, i = 0; i < cnt; i++)
+		if (redundant[i])
+			array[j++] = work[i];
+	free(work);
+	free(redundant);
+	free(filled_index);
+	return filled;
+}
+
+static struct commit_list *get_merge_bases_many_0(struct commit *one,
+						  int n,
+						  struct commit **twos,
+						  int cleanup)
+{
+	struct commit_list *list;
+	struct commit **rslt;
+	struct commit_list *result;
+	int cnt, i;
+
+	result = merge_bases_many(one, n, twos);
+	for (i = 0; i < n; i++) {
+		if (one == twos[i])
+			return result;
+	}
+	if (!result || !result->next) {
+		if (cleanup) {
+			clear_commit_marks(one, all_flags);
+			clear_commit_marks_many(n, twos, all_flags);
+		}
+		return result;
+	}
+
+	/* There are more than one */
+	cnt = commit_list_count(result);
+	rslt = xcalloc(cnt, sizeof(*rslt));
+	for (list = result, i = 0; list; list = list->next)
+		rslt[i++] = list->item;
+	free_commit_list(result);
+
+	clear_commit_marks(one, all_flags);
+	clear_commit_marks_many(n, twos, all_flags);
+
+	cnt = remove_redundant(rslt, cnt);
+	result = NULL;
+	for (i = 0; i < cnt; i++)
+		commit_list_insert_by_date(rslt[i], &result);
+	free(rslt);
+	return result;
+}
+
+struct commit_list *get_merge_bases_many(struct commit *one,
+					 int n,
+					 struct commit **twos)
+{
+	return get_merge_bases_many_0(one, n, twos, 1);
+}
+
+struct commit_list *get_merge_bases_many_dirty(struct commit *one,
+					       int n,
+					       struct commit **twos)
+{
+	return get_merge_bases_many_0(one, n, twos, 0);
+}
+
+struct commit_list *get_merge_bases(struct commit *one, struct commit *two)
+{
+	return get_merge_bases_many_0(one, 1, &two, 1);
+}
+
+/*
+ * Is "commit" a descendant of one of the elements on the "with_commit" list?
+ */
+int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
+{
+	if (!with_commit)
+		return 1;
+	while (with_commit) {
+		struct commit *other;
+
+		other = with_commit->item;
+		with_commit = with_commit->next;
+		if (in_merge_bases(other, commit))
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Is "commit" an ancestor of one of the "references"?
+ */
+int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference)
+{
+	struct commit_list *bases;
+	int ret = 0, i;
+	uint32_t min_generation = GENERATION_NUMBER_INFINITY;
+
+	if (parse_commit(commit))
+		return ret;
+	for (i = 0; i < nr_reference; i++) {
+		if (parse_commit(reference[i]))
+			return ret;
+		if (reference[i]->generation < min_generation)
+			min_generation = reference[i]->generation;
+	}
+
+	if (commit->generation > min_generation)
+		return ret;
+
+	bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
+	if (commit->object.flags & PARENT2)
+		ret = 1;
+	clear_commit_marks(commit, all_flags);
+	clear_commit_marks_many(nr_reference, reference, all_flags);
+	free_commit_list(bases);
+	return ret;
+}
+
+/*
+ * Is "commit" an ancestor of (i.e. reachable from) the "reference"?
+ */
+int in_merge_bases(struct commit *commit, struct commit *reference)
+{
+	return in_merge_bases_many(commit, 1, &reference);
+}
+
+struct commit_list *reduce_heads(struct commit_list *heads)
+{
+	struct commit_list *p;
+	struct commit_list *result = NULL, **tail = &result;
+	struct commit **array;
+	int num_head, i;
+
+	if (!heads)
+		return NULL;
+
+	/* Uniquify */
+	for (p = heads; p; p = p->next)
+		p->item->object.flags &= ~STALE;
+	for (p = heads, num_head = 0; p; p = p->next) {
+		if (p->item->object.flags & STALE)
+			continue;
+		p->item->object.flags |= STALE;
+		num_head++;
+	}
+	array = xcalloc(num_head, sizeof(*array));
+	for (p = heads, i = 0; p; p = p->next) {
+		if (p->item->object.flags & STALE) {
+			array[i++] = p->item;
+			p->item->object.flags &= ~STALE;
+		}
+	}
+	num_head = remove_redundant(array, num_head);
+	for (i = 0; i < num_head; i++)
+		tail = &commit_list_insert(array[i], tail)->next;
+	free(array);
+	return result;
+}
+
+void reduce_heads_replace(struct commit_list **heads)
+{
+	struct commit_list *result = reduce_heads(*heads);
+	free_commit_list(*heads);
+	*heads = result;
+}
diff --git a/commit-reach.h b/commit-reach.h
new file mode 100644
index 000000000..244f48c5f
--- /dev/null
+++ b/commit-reach.h
@@ -0,0 +1,41 @@
+#ifndef __COMMIT_REACH_H__
+#define __COMMIT_REACH_H__
+
+#include "commit.h"
+
+struct commit_list *get_merge_bases_many(struct commit *one,
+					 int n,
+					 struct commit **twos);
+struct commit_list *get_merge_bases_many_dirty(struct commit *one,
+					       int n,
+					       struct commit **twos);
+struct commit_list *get_merge_bases(struct commit *one, struct commit *two);
+struct commit_list *get_octopus_merge_bases(struct commit_list *in);
+
+/* To be used only when object flags after this call no longer matter */
+struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos);
+
+int is_descendant_of(struct commit *commit, struct commit_list *with_commit);
+int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference);
+int in_merge_bases(struct commit *commit, struct commit *reference);
+
+
+/*
+ * Takes a list of commits and returns a new list where those
+ * have been removed that can be reached from other commits in
+ * the list. It is useful for, e.g., reducing the commits
+ * randomly thrown at the git-merge command and removing
+ * redundant commits that the user shouldn't have given to it.
+ *
+ * This function destroys the STALE bit of the commit objects'
+ * flags.
+ */
+struct commit_list *reduce_heads(struct commit_list *heads);
+
+/*
+ * Like `reduce_heads()`, except it replaces the list. Use this
+ * instead of `foo = reduce_heads(foo);` to avoid memory leaks.
+ */
+void reduce_heads_replace(struct commit_list **heads);
+
+#endif
diff --git a/commit.c b/commit.c
index 39b80bd21..32d1234bd 100644
--- a/commit.c
+++ b/commit.c
@@ -843,364 +843,6 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so
 		clear_author_date_slab(&author_date);
 }
 
-/* merge-base stuff */
-
-/* Remember to update object flag allocation in object.h */
-#define PARENT1		(1u<<16)
-#define PARENT2		(1u<<17)
-#define STALE		(1u<<18)
-#define RESULT		(1u<<19)
-
-static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
-
-static int queue_has_nonstale(struct prio_queue *queue)
-{
-	int i;
-	for (i = 0; i < queue->nr; i++) {
-		struct commit *commit = queue->array[i].data;
-		if (!(commit->object.flags & STALE))
-			return 1;
-	}
-	return 0;
-}
-
-/* all input commits in one and twos[] must have been parsed! */
-static struct commit_list *paint_down_to_common(struct commit *one, int n,
-						struct commit **twos,
-						int min_generation)
-{
-	struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
-	struct commit_list *result = NULL;
-	int i;
-	uint32_t last_gen = GENERATION_NUMBER_INFINITY;
-
-	one->object.flags |= PARENT1;
-	if (!n) {
-		commit_list_append(one, &result);
-		return result;
-	}
-	prio_queue_put(&queue, one);
-
-	for (i = 0; i < n; i++) {
-		twos[i]->object.flags |= PARENT2;
-		prio_queue_put(&queue, twos[i]);
-	}
-
-	while (queue_has_nonstale(&queue)) {
-		struct commit *commit = prio_queue_get(&queue);
-		struct commit_list *parents;
-		int flags;
-
-		if (commit->generation > last_gen)
-			BUG("bad generation skip %8x > %8x at %s",
-			    commit->generation, last_gen,
-			    oid_to_hex(&commit->object.oid));
-		last_gen = commit->generation;
-
-		if (commit->generation < min_generation)
-			break;
-
-		flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
-		if (flags == (PARENT1 | PARENT2)) {
-			if (!(commit->object.flags & RESULT)) {
-				commit->object.flags |= RESULT;
-				commit_list_insert_by_date(commit, &result);
-			}
-			/* Mark parents of a found merge stale */
-			flags |= STALE;
-		}
-		parents = commit->parents;
-		while (parents) {
-			struct commit *p = parents->item;
-			parents = parents->next;
-			if ((p->object.flags & flags) == flags)
-				continue;
-			if (parse_commit(p))
-				return NULL;
-			p->object.flags |= flags;
-			prio_queue_put(&queue, p);
-		}
-	}
-
-	clear_prio_queue(&queue);
-	return result;
-}
-
-static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos)
-{
-	struct commit_list *list = NULL;
-	struct commit_list *result = NULL;
-	int i;
-
-	for (i = 0; i < n; i++) {
-		if (one == twos[i])
-			/*
-			 * We do not mark this even with RESULT so we do not
-			 * have to clean it up.
-			 */
-			return commit_list_insert(one, &result);
-	}
-
-	if (parse_commit(one))
-		return NULL;
-	for (i = 0; i < n; i++) {
-		if (parse_commit(twos[i]))
-			return NULL;
-	}
-
-	list = paint_down_to_common(one, n, twos, 0);
-
-	while (list) {
-		struct commit *commit = pop_commit(&list);
-		if (!(commit->object.flags & STALE))
-			commit_list_insert_by_date(commit, &result);
-	}
-	return result;
-}
-
-struct commit_list *get_octopus_merge_bases(struct commit_list *in)
-{
-	struct commit_list *i, *j, *k, *ret = NULL;
-
-	if (!in)
-		return ret;
-
-	commit_list_insert(in->item, &ret);
-
-	for (i = in->next; i; i = i->next) {
-		struct commit_list *new_commits = NULL, *end = NULL;
-
-		for (j = ret; j; j = j->next) {
-			struct commit_list *bases;
-			bases = get_merge_bases(i->item, j->item);
-			if (!new_commits)
-				new_commits = bases;
-			else
-				end->next = bases;
-			for (k = bases; k; k = k->next)
-				end = k;
-		}
-		ret = new_commits;
-	}
-	return ret;
-}
-
-static int remove_redundant(struct commit **array, int cnt)
-{
-	/*
-	 * Some commit in the array may be an ancestor of
-	 * another commit.  Move such commit to the end of
-	 * the array, and return the number of commits that
-	 * are independent from each other.
-	 */
-	struct commit **work;
-	unsigned char *redundant;
-	int *filled_index;
-	int i, j, filled;
-
-	work = xcalloc(cnt, sizeof(*work));
-	redundant = xcalloc(cnt, 1);
-	ALLOC_ARRAY(filled_index, cnt - 1);
-
-	for (i = 0; i < cnt; i++)
-		parse_commit(array[i]);
-	for (i = 0; i < cnt; i++) {
-		struct commit_list *common;
-		uint32_t min_generation = array[i]->generation;
-
-		if (redundant[i])
-			continue;
-		for (j = filled = 0; j < cnt; j++) {
-			if (i == j || redundant[j])
-				continue;
-			filled_index[filled] = j;
-			work[filled++] = array[j];
-
-			if (array[j]->generation < min_generation)
-				min_generation = array[j]->generation;
-		}
-		common = paint_down_to_common(array[i], filled, work,
-					      min_generation);
-		if (array[i]->object.flags & PARENT2)
-			redundant[i] = 1;
-		for (j = 0; j < filled; j++)
-			if (work[j]->object.flags & PARENT1)
-				redundant[filled_index[j]] = 1;
-		clear_commit_marks(array[i], all_flags);
-		clear_commit_marks_many(filled, work, all_flags);
-		free_commit_list(common);
-	}
-
-	/* Now collect the result */
-	COPY_ARRAY(work, array, cnt);
-	for (i = filled = 0; i < cnt; i++)
-		if (!redundant[i])
-			array[filled++] = work[i];
-	for (j = filled, i = 0; i < cnt; i++)
-		if (redundant[i])
-			array[j++] = work[i];
-	free(work);
-	free(redundant);
-	free(filled_index);
-	return filled;
-}
-
-static struct commit_list *get_merge_bases_many_0(struct commit *one,
-						  int n,
-						  struct commit **twos,
-						  int cleanup)
-{
-	struct commit_list *list;
-	struct commit **rslt;
-	struct commit_list *result;
-	int cnt, i;
-
-	result = merge_bases_many(one, n, twos);
-	for (i = 0; i < n; i++) {
-		if (one == twos[i])
-			return result;
-	}
-	if (!result || !result->next) {
-		if (cleanup) {
-			clear_commit_marks(one, all_flags);
-			clear_commit_marks_many(n, twos, all_flags);
-		}
-		return result;
-	}
-
-	/* There are more than one */
-	cnt = commit_list_count(result);
-	rslt = xcalloc(cnt, sizeof(*rslt));
-	for (list = result, i = 0; list; list = list->next)
-		rslt[i++] = list->item;
-	free_commit_list(result);
-
-	clear_commit_marks(one, all_flags);
-	clear_commit_marks_many(n, twos, all_flags);
-
-	cnt = remove_redundant(rslt, cnt);
-	result = NULL;
-	for (i = 0; i < cnt; i++)
-		commit_list_insert_by_date(rslt[i], &result);
-	free(rslt);
-	return result;
-}
-
-struct commit_list *get_merge_bases_many(struct commit *one,
-					 int n,
-					 struct commit **twos)
-{
-	return get_merge_bases_many_0(one, n, twos, 1);
-}
-
-struct commit_list *get_merge_bases_many_dirty(struct commit *one,
-					       int n,
-					       struct commit **twos)
-{
-	return get_merge_bases_many_0(one, n, twos, 0);
-}
-
-struct commit_list *get_merge_bases(struct commit *one, struct commit *two)
-{
-	return get_merge_bases_many_0(one, 1, &two, 1);
-}
-
-/*
- * Is "commit" a descendant of one of the elements on the "with_commit" list?
- */
-int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
-{
-	if (!with_commit)
-		return 1;
-	while (with_commit) {
-		struct commit *other;
-
-		other = with_commit->item;
-		with_commit = with_commit->next;
-		if (in_merge_bases(other, commit))
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Is "commit" an ancestor of one of the "references"?
- */
-int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference)
-{
-	struct commit_list *bases;
-	int ret = 0, i;
-	uint32_t min_generation = GENERATION_NUMBER_INFINITY;
-
-	if (parse_commit(commit))
-		return ret;
-	for (i = 0; i < nr_reference; i++) {
-		if (parse_commit(reference[i]))
-			return ret;
-		if (reference[i]->generation < min_generation)
-			min_generation = reference[i]->generation;
-	}
-
-	if (commit->generation > min_generation)
-		return ret;
-
-	bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
-	if (commit->object.flags & PARENT2)
-		ret = 1;
-	clear_commit_marks(commit, all_flags);
-	clear_commit_marks_many(nr_reference, reference, all_flags);
-	free_commit_list(bases);
-	return ret;
-}
-
-/*
- * Is "commit" an ancestor of (i.e. reachable from) the "reference"?
- */
-int in_merge_bases(struct commit *commit, struct commit *reference)
-{
-	return in_merge_bases_many(commit, 1, &reference);
-}
-
-struct commit_list *reduce_heads(struct commit_list *heads)
-{
-	struct commit_list *p;
-	struct commit_list *result = NULL, **tail = &result;
-	struct commit **array;
-	int num_head, i;
-
-	if (!heads)
-		return NULL;
-
-	/* Uniquify */
-	for (p = heads; p; p = p->next)
-		p->item->object.flags &= ~STALE;
-	for (p = heads, num_head = 0; p; p = p->next) {
-		if (p->item->object.flags & STALE)
-			continue;
-		p->item->object.flags |= STALE;
-		num_head++;
-	}
-	array = xcalloc(num_head, sizeof(*array));
-	for (p = heads, i = 0; p; p = p->next) {
-		if (p->item->object.flags & STALE) {
-			array[i++] = p->item;
-			p->item->object.flags &= ~STALE;
-		}
-	}
-	num_head = remove_redundant(array, num_head);
-	for (i = 0; i < num_head; i++)
-		tail = &commit_list_insert(array[i], tail)->next;
-	free(array);
-	return result;
-}
-
-void reduce_heads_replace(struct commit_list **heads)
-{
-	struct commit_list *result = reduce_heads(*heads);
-	free_commit_list(*heads);
-	*heads = result;
-}
-
 static const char gpg_sig_header[] = "gpgsig";
 static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1;
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 02/16] commit-reach: move ref_newer from remote.c
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (2 preceding siblings ...)
  2018-06-25 17:16 ` [PATCH 01/16] commit-reach: move walk methods from commit.c Derrick Stolee via GitGitGadget
@ 2018-06-25 17:35 ` Derrick Stolee via GitGitGadget
  2018-07-16 19:10   ` Stefan Beller
  2018-06-25 18:01 ` [PATCH 03/16] commit-reach: move commit_contains from ref-filter Derrick Stolee via GitGitGadget
                   ` (13 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-06-25 17:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/remote.c |  1 +
 commit-reach.c   | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h   |  2 ++
 http-push.c      |  1 +
 remote.c         | 50 +-------------------------------------------
 remote.h         |  1 -
 6 files changed, 59 insertions(+), 50 deletions(-)

diff --git a/builtin/remote.c b/builtin/remote.c
index c74ee8869..79b032644 100644
--- a/builtin/remote.c
+++ b/builtin/remote.c
@@ -10,6 +10,7 @@
 #include "refspec.h"
 #include "object-store.h"
 #include "argv-array.h"
+#include "commit-reach.h"
 
 static const char * const builtin_remote_usage[] = {
 	N_("git remote [-v | --verbose]"),
diff --git a/commit-reach.c b/commit-reach.c
index f2e2f7461..a6bc4781a 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -1,5 +1,10 @@
 #include "cache.h"
+#include "commit.h"
+#include "decorate.h"
 #include "prio-queue.h"
+#include "tree.h"
+#include "revision.h"
+#include "tag.h"
 #include "commit-reach.h"
 
 /* Remember to update object flag allocation in object.h */
@@ -357,3 +362,52 @@ void reduce_heads_replace(struct commit_list **heads)
 	free_commit_list(*heads);
 	*heads = result;
 }
+
+static void unmark_and_free(struct commit_list *list, unsigned int mark)
+{
+	while (list) {
+		struct commit *commit = pop_commit(&list);
+		commit->object.flags &= ~mark;
+	}
+}
+
+int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
+{
+	struct object *o;
+	struct commit *old_commit, *new_commit;
+	struct commit_list *list, *used;
+	int found = 0;
+
+	/*
+	 * Both new_commit and old_commit must be commit-ish and new_commit is descendant of
+	 * old_commit.  Otherwise we require --force.
+	 */
+	o = deref_tag(the_repository, parse_object(the_repository, old_oid),
+		      NULL, 0);
+	if (!o || o->type != OBJ_COMMIT)
+		return 0;
+	old_commit = (struct commit *) o;
+
+	o = deref_tag(the_repository, parse_object(the_repository, new_oid),
+		      NULL, 0);
+	if (!o || o->type != OBJ_COMMIT)
+		return 0;
+	new_commit = (struct commit *) o;
+
+	if (parse_commit(new_commit) < 0)
+		return 0;
+
+	used = list = NULL;
+	commit_list_insert(new_commit, &list);
+	while (list) {
+		new_commit = pop_most_recent_commit(&list, TMP_MARK);
+		commit_list_insert(new_commit, &used);
+		if (new_commit == old_commit) {
+			found = 1;
+			break;
+		}
+	}
+	unmark_and_free(list, TMP_MARK);
+	unmark_and_free(used, TMP_MARK);
+	return found;
+}
diff --git a/commit-reach.h b/commit-reach.h
index 244f48c5f..35ec9f0dd 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -38,4 +38,6 @@ struct commit_list *reduce_heads(struct commit_list *heads);
  */
 void reduce_heads_replace(struct commit_list **heads);
 
+int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
+
 #endif
diff --git a/http-push.c b/http-push.c
index 5eaf551b5..e007cb5a6 100644
--- a/http-push.c
+++ b/http-push.c
@@ -14,6 +14,7 @@
 #include "argv-array.h"
 #include "packfile.h"
 #include "object-store.h"
+#include "commit-reach.h"
 
 
 #ifdef EXPAT_NEEDS_XMLPARSE_H
diff --git a/remote.c b/remote.c
index 26b1fbd9a..f0c23bae4 100644
--- a/remote.c
+++ b/remote.c
@@ -12,6 +12,7 @@
 #include "string-list.h"
 #include "mergesort.h"
 #include "argv-array.h"
+#include "commit-reach.h"
 
 enum map_direction { FROM_SRC, FROM_DST };
 
@@ -1783,55 +1784,6 @@ int resolve_remote_symref(struct ref *ref, struct ref *list)
 	return 1;
 }
 
-static void unmark_and_free(struct commit_list *list, unsigned int mark)
-{
-	while (list) {
-		struct commit *commit = pop_commit(&list);
-		commit->object.flags &= ~mark;
-	}
-}
-
-int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
-{
-	struct object *o;
-	struct commit *old_commit, *new_commit;
-	struct commit_list *list, *used;
-	int found = 0;
-
-	/*
-	 * Both new_commit and old_commit must be commit-ish and new_commit is descendant of
-	 * old_commit.  Otherwise we require --force.
-	 */
-	o = deref_tag(the_repository, parse_object(the_repository, old_oid),
-		      NULL, 0);
-	if (!o || o->type != OBJ_COMMIT)
-		return 0;
-	old_commit = (struct commit *) o;
-
-	o = deref_tag(the_repository, parse_object(the_repository, new_oid),
-		      NULL, 0);
-	if (!o || o->type != OBJ_COMMIT)
-		return 0;
-	new_commit = (struct commit *) o;
-
-	if (parse_commit(new_commit) < 0)
-		return 0;
-
-	used = list = NULL;
-	commit_list_insert(new_commit, &list);
-	while (list) {
-		new_commit = pop_most_recent_commit(&list, TMP_MARK);
-		commit_list_insert(new_commit, &used);
-		if (new_commit == old_commit) {
-			found = 1;
-			break;
-		}
-	}
-	unmark_and_free(list, TMP_MARK);
-	unmark_and_free(used, TMP_MARK);
-	return found;
-}
-
 /*
  * Lookup the upstream branch for the given branch and if present, optionally
  * compute the commit ahead/behind values for the pair.
diff --git a/remote.h b/remote.h
index 45ecc6cef..56fb9cbb2 100644
--- a/remote.h
+++ b/remote.h
@@ -149,7 +149,6 @@ extern struct ref **get_remote_refs(int fd_out, struct packet_reader *reader,
 				    const struct string_list *server_options);
 
 int resolve_remote_symref(struct ref *ref, struct ref *list);
-int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
 
 /*
  * Remove and free all but the first of any entries in the input list
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 03/16] commit-reach: move commit_contains from ref-filter
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (3 preceding siblings ...)
  2018-06-25 17:35 ` [PATCH 02/16] commit-reach: move ref_newer from remote.c Derrick Stolee via GitGitGadget
@ 2018-06-25 18:01 ` Derrick Stolee via GitGitGadget
  2018-07-16 19:14   ` Stefan Beller
  2018-06-28 12:31 ` [PATCH 15/16] commit-reach: make can_all_from_reach... linear Derrick Stolee via GitGitGadget
                   ` (12 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-06-25 18:01 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 120 ++++++++++++++++++++++++++++++++++++++++
 commit-reach.h |  18 +++++-
 fast-import.c  |   1 +
 ref-filter.c   | 147 +++----------------------------------------------
 4 files changed, 146 insertions(+), 140 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index a6bc4781a..9e56f90ea 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "commit.h"
+#include "commit-graph.h"
 #include "decorate.h"
 #include "prio-queue.h"
 #include "tree.h"
@@ -411,3 +412,122 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
 	unmark_and_free(used, TMP_MARK);
 	return found;
 }
+
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct contains_stack {
+	int nr, alloc;
+	struct contains_stack_entry {
+		struct commit *commit;
+		struct commit_list *parents;
+	} *contains_stack;
+};
+
+static int in_commit_list(const struct commit_list *want, struct commit *c)
+{
+	for (; want; want = want->next)
+		if (!oidcmp(&want->item->object.oid, &c->object.oid))
+			return 1;
+	return 0;
+}
+
+/*
+ * Test whether the candidate is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static enum contains_result contains_test(struct commit *candidate,
+					  const struct commit_list *want,
+					  struct contains_cache *cache,
+					  uint32_t cutoff)
+{
+	enum contains_result *cached = contains_cache_at(cache, candidate);
+
+	/* If we already have the answer cached, return that. */
+	if (*cached)
+		return *cached;
+
+	/* or are we it? */
+	if (in_commit_list(want, candidate)) {
+		*cached = CONTAINS_YES;
+		return CONTAINS_YES;
+	}
+
+	/* Otherwise, we don't know; prepare to recurse */
+	parse_commit_or_die(candidate);
+
+	if (candidate->generation < cutoff)
+		return CONTAINS_NO;
+
+	return CONTAINS_UNKNOWN;
+}
+
+static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack)
+{
+	ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc);
+	contains_stack->contains_stack[contains_stack->nr].commit = candidate;
+	contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents;
+}
+
+static enum contains_result contains_tag_algo(struct commit *candidate,
+					      const struct commit_list *want,
+					      struct contains_cache *cache)
+{
+	struct contains_stack contains_stack = { 0, 0, NULL };
+	enum contains_result result;
+	uint32_t cutoff = GENERATION_NUMBER_INFINITY;
+	const struct commit_list *p;
+
+	for (p = want; p; p = p->next) {
+		struct commit *c = p->item;
+		load_commit_graph_info(the_repository, c);
+		if (c->generation < cutoff)
+			cutoff = c->generation;
+	}
+
+	result = contains_test(candidate, want, cache, cutoff);
+	if (result != CONTAINS_UNKNOWN)
+		return result;
+
+	push_to_contains_stack(candidate, &contains_stack);
+	while (contains_stack.nr) {
+		struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1];
+		struct commit *commit = entry->commit;
+		struct commit_list *parents = entry->parents;
+
+		if (!parents) {
+			*contains_cache_at(cache, commit) = CONTAINS_NO;
+			contains_stack.nr--;
+		}
+		/*
+		 * If we just popped the stack, parents->item has been marked,
+		 * therefore contains_test will return a meaningful yes/no.
+		 */
+		else switch (contains_test(parents->item, want, cache, cutoff)) {
+		case CONTAINS_YES:
+			*contains_cache_at(cache, commit) = CONTAINS_YES;
+			contains_stack.nr--;
+			break;
+		case CONTAINS_NO:
+			entry->parents = parents->next;
+			break;
+		case CONTAINS_UNKNOWN:
+			push_to_contains_stack(parents->item, &contains_stack);
+			break;
+		}
+	}
+	free(contains_stack.contains_stack);
+	return contains_test(candidate, want, cache, cutoff);
+}
+
+int commit_contains(struct ref_filter *filter, struct commit *commit,
+		    struct commit_list *list, struct contains_cache *cache)
+{
+	if (filter->with_commit_tag_algo)
+		return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
+	return is_descendant_of(commit, list);
+}
diff --git a/commit-reach.h b/commit-reach.h
index 35ec9f0dd..925cb05d7 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -2,6 +2,8 @@
 #define __COMMIT_REACH_H__
 
 #include "commit.h"
+#include "commit-slab.h"
+#include "ref-filter.h"
 
 struct commit_list *get_merge_bases_many(struct commit *one,
 					 int n,
@@ -19,7 +21,6 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit);
 int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference);
 int in_merge_bases(struct commit *commit, struct commit *reference);
 
-
 /*
  * Takes a list of commits and returns a new list where those
  * have been removed that can be reached from other commits in
@@ -40,4 +41,19 @@ void reduce_heads_replace(struct commit_list **heads);
 
 int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
 
+/*
+ * Unknown has to be "0" here, because that's the default value for
+ * contains_cache slab entries that have not yet been assigned.
+ */
+enum contains_result {
+	CONTAINS_UNKNOWN = 0,
+	CONTAINS_NO,
+	CONTAINS_YES
+};
+
+define_commit_slab(contains_cache, enum contains_result);
+
+int commit_contains(struct ref_filter *filter, struct commit *commit,
+		    struct commit_list *list, struct contains_cache *cache);
+
 #endif
diff --git a/fast-import.c b/fast-import.c
index 3ea578102..4a93df383 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -171,6 +171,7 @@ Format of STDIN stream:
 #include "packfile.h"
 #include "object-store.h"
 #include "mem-pool.h"
+#include "commit-reach.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
diff --git a/ref-filter.c b/ref-filter.c
index 9b2da8839..35b2d25ce 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -18,7 +18,7 @@
 #include "trailer.h"
 #include "wt-status.h"
 #include "commit-slab.h"
-#include "commit-graph.h"
+#include "commit-reach.h"
 
 static struct ref_msg {
 	const char *gone;
@@ -1623,144 +1623,6 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 	return 0;
 }
 
-/*
- * Unknown has to be "0" here, because that's the default value for
- * contains_cache slab entries that have not yet been assigned.
- */
-enum contains_result {
-	CONTAINS_UNKNOWN = 0,
-	CONTAINS_NO,
-	CONTAINS_YES
-};
-
-define_commit_slab(contains_cache, enum contains_result);
-
-struct ref_filter_cbdata {
-	struct ref_array *array;
-	struct ref_filter *filter;
-	struct contains_cache contains_cache;
-	struct contains_cache no_contains_cache;
-};
-
-/*
- * Mimicking the real stack, this stack lives on the heap, avoiding stack
- * overflows.
- *
- * At each recursion step, the stack items points to the commits whose
- * ancestors are to be inspected.
- */
-struct contains_stack {
-	int nr, alloc;
-	struct contains_stack_entry {
-		struct commit *commit;
-		struct commit_list *parents;
-	} *contains_stack;
-};
-
-static int in_commit_list(const struct commit_list *want, struct commit *c)
-{
-	for (; want; want = want->next)
-		if (!oidcmp(&want->item->object.oid, &c->object.oid))
-			return 1;
-	return 0;
-}
-
-/*
- * Test whether the candidate is contained in the list.
- * Do not recurse to find out, though, but return -1 if inconclusive.
- */
-static enum contains_result contains_test(struct commit *candidate,
-					  const struct commit_list *want,
-					  struct contains_cache *cache,
-					  uint32_t cutoff)
-{
-	enum contains_result *cached = contains_cache_at(cache, candidate);
-
-	/* If we already have the answer cached, return that. */
-	if (*cached)
-		return *cached;
-
-	/* or are we it? */
-	if (in_commit_list(want, candidate)) {
-		*cached = CONTAINS_YES;
-		return CONTAINS_YES;
-	}
-
-	/* Otherwise, we don't know; prepare to recurse */
-	parse_commit_or_die(candidate);
-
-	if (candidate->generation < cutoff)
-		return CONTAINS_NO;
-
-	return CONTAINS_UNKNOWN;
-}
-
-static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack)
-{
-	ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc);
-	contains_stack->contains_stack[contains_stack->nr].commit = candidate;
-	contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents;
-}
-
-static enum contains_result contains_tag_algo(struct commit *candidate,
-					      const struct commit_list *want,
-					      struct contains_cache *cache)
-{
-	struct contains_stack contains_stack = { 0, 0, NULL };
-	enum contains_result result;
-	uint32_t cutoff = GENERATION_NUMBER_INFINITY;
-	const struct commit_list *p;
-
-	for (p = want; p; p = p->next) {
-		struct commit *c = p->item;
-		load_commit_graph_info(the_repository, c);
-		if (c->generation < cutoff)
-			cutoff = c->generation;
-	}
-
-	result = contains_test(candidate, want, cache, cutoff);
-	if (result != CONTAINS_UNKNOWN)
-		return result;
-
-	push_to_contains_stack(candidate, &contains_stack);
-	while (contains_stack.nr) {
-		struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1];
-		struct commit *commit = entry->commit;
-		struct commit_list *parents = entry->parents;
-
-		if (!parents) {
-			*contains_cache_at(cache, commit) = CONTAINS_NO;
-			contains_stack.nr--;
-		}
-		/*
-		 * If we just popped the stack, parents->item has been marked,
-		 * therefore contains_test will return a meaningful yes/no.
-		 */
-		else switch (contains_test(parents->item, want, cache, cutoff)) {
-		case CONTAINS_YES:
-			*contains_cache_at(cache, commit) = CONTAINS_YES;
-			contains_stack.nr--;
-			break;
-		case CONTAINS_NO:
-			entry->parents = parents->next;
-			break;
-		case CONTAINS_UNKNOWN:
-			push_to_contains_stack(parents->item, &contains_stack);
-			break;
-		}
-	}
-	free(contains_stack.contains_stack);
-	return contains_test(candidate, want, cache, cutoff);
-}
-
-static int commit_contains(struct ref_filter *filter, struct commit *commit,
-			   struct commit_list *list, struct contains_cache *cache)
-{
-	if (filter->with_commit_tag_algo)
-		return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
-	return is_descendant_of(commit, list);
-}
-
 /*
  * Return 1 if the refname matches one of the patterns, otherwise 0.
  * A pattern can be a literal prefix (e.g. a refname "refs/heads/master"
@@ -1987,6 +1849,13 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname)
 	return ref_kind_from_refname(refname);
 }
 
+struct ref_filter_cbdata {
+	struct ref_array *array;
+	struct ref_filter *filter;
+	struct contains_cache contains_cache;
+	struct contains_cache no_contains_cache;
+};
+
 /*
  * A call-back given to for_each_ref().  Filter refs and keep them for
  * later object processing.
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (4 preceding siblings ...)
  2018-06-25 18:01 ` [PATCH 03/16] commit-reach: move commit_contains from ref-filter Derrick Stolee via GitGitGadget
@ 2018-06-28 12:31 ` Derrick Stolee via GitGitGadget
  2018-07-16 22:37   ` Stefan Beller
                     ` (2 more replies)
  2018-07-12 20:47 ` [PATCH 06/16] upload-pack: generalize commit date cutoff Derrick Stolee via GitGitGadget
                   ` (11 subsequent siblings)
  17 siblings, 3 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-06-28 12:31 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The can_all_from_reach_with_flags() algorithm is currently quadratic in
the worst case, because it calls the reachable() method for every 'from'
without tracking which commits have already been walked or which can
already reach a commit in 'to'.

Rewrite the algorithm to walk each commit a constant number of times.

We also add some optimizations that should work for the main consumer of
this method: fetch negotitation (haves/wants).

The first step includes using a depth-first-search (DFS) from each from
commit, sorted by ascending generation number. We do not walk beyond the
minimum generation number or the minimum commit date. This DFS is likely
to be faster than the existing reachable() method because we expect
previous ref values to be along the first-parent history.

If we find a target commit, then we mark everything in the DFS stack as
a RESULT. This expands the set of targets for the other from commits. We
also mark the visited commits using 'assign_flag' to prevent re-walking
the same code.

We still need to clear our flags at the end, which is why we will have a
total of three visits to each commit.

Performance was measured on the Linux repository using
'test-tool reach can_all_from_reach'. The input included rows seeded by
tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as
v3.[0-9]*. This mimics a (very large) fetch that says "I have all major
v3 releases and want all major v4 releases." The "large" case included
X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate
tags to the set, which does not greatly increase the number of objects
that are considered, but does increase the number of 'from' commits,
demonstrating the quadratic nature of the previous code.

Small Case
----------

Before: 1.52 s
 After: 0.26 s

Large Case
----------

Before: 3.50 s
 After: 0.27 s

Note how the time increases between the two cases in the two versions.
The new code increases relative to the number of commits that need to be
walked, but not directly relative to the number of 'from' commits.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 124 ++++++++++++++++++++++++++++++-------------------
 commit-reach.h |   6 ++-
 upload-pack.c  |   5 +-
 3 files changed, 85 insertions(+), 50 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index c58e50fbb..ac132c8e4 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -513,65 +513,88 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 	return is_descendant_of(commit, list);
 }
 
-int reachable(struct commit *from, int with_flag, int assign_flag,
-	      time_t min_commit_date)
+static int compare_commits_by_gen(const void *_a, const void *_b)
 {
-	struct prio_queue work = { compare_commits_by_commit_date };
+	const struct commit *a = (const struct commit *)_a;
+	const struct commit *b = (const struct commit *)_b;
 
-	prio_queue_put(&work, from);
-	while (work.nr) {
-		struct commit_list *list;
-		struct commit *commit = prio_queue_get(&work);
-
-		if (commit->object.flags & with_flag) {
-			from->object.flags |= assign_flag;
-			break;
-		}
-		if (!commit->object.parsed)
-			parse_object(the_repository, &commit->object.oid);
-		if (commit->object.flags & REACHABLE)
-			continue;
-		commit->object.flags |= REACHABLE;
-		if (commit->date < min_commit_date)
-			continue;
-		for (list = commit->parents; list; list = list->next) {
-			struct commit *parent = list->item;
-			if (!(parent->object.flags & REACHABLE))
-				prio_queue_put(&work, parent);
-		}
-	}
-	from->object.flags |= REACHABLE;
-	clear_commit_marks(from, REACHABLE);
-	clear_prio_queue(&work);
-	return (from->object.flags & assign_flag);
+	if (a->generation < b->generation)
+		return -1;
+	if (a->generation > b->generation)
+		return 1;
+	return 0;
 }
 
 int can_all_from_reach_with_flag(struct object_array *from,
 				 int with_flag, int assign_flag,
-				 time_t min_commit_date)
+				 time_t min_commit_date,
+				 uint32_t min_generation)
 {
+	struct commit **list = NULL;
 	int i;
+	int result = 1;
 
+	ALLOC_ARRAY(list, from->nr);
 	for (i = 0; i < from->nr; i++) {
-		struct object *from_one = from->objects[i].item;
+		list[i] = (struct commit *)from->objects[i].item;
 
-		if (from_one->flags & assign_flag)
-			continue;
-		from_one = deref_tag(the_repository, from_one, "a from object", 0);
-		if (!from_one || from_one->type != OBJ_COMMIT) {
-			/* no way to tell if this is reachable by
-			 * looking at the ancestry chain alone, so
-			 * leave a note to ourselves not to worry about
-			 * this object anymore.
-			 */
-			from->objects[i].item->flags |= assign_flag;
-			continue;
-		}
-		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
-			       min_commit_date))
+		parse_commit(list[i]);
+
+		if (list[i]->generation < min_generation)
 			return 0;
 	}
-	return 1;
+
+	QSORT(list, from->nr, compare_commits_by_gen);
+
+	for (i = 0; i < from->nr; i++) {
+		/* DFS from list[i] */
+		struct commit_list *stack = NULL;
+
+		list[i]->object.flags |= assign_flag;
+		commit_list_insert(list[i], &stack);
+
+		while (stack) {
+			struct commit_list *parent;
+
+			if (stack->item->object.flags & with_flag) {
+				pop_commit(&stack);
+				continue;
+			}
+
+			for (parent = stack->item->parents; parent; parent = parent->next) {
+				if (parent->item->object.flags & (with_flag | RESULT))
+					stack->item->object.flags |= RESULT;
+
+				if (!(parent->item->object.flags & assign_flag)) {
+					parent->item->object.flags |= assign_flag;
+
+					parse_commit(parent->item);
+
+					if (parent->item->date < min_commit_date ||
+					    parent->item->generation < min_generation)
+						continue;
+
+					commit_list_insert(parent->item, &stack);
+					break;
+				}
+			}
+
+			if (!parent)
+				pop_commit(&stack);
+		}
+
+		if (!(list[i]->object.flags & (with_flag | RESULT))) {
+			result = 0;
+			goto cleanup;
+		}
+	}
+
+cleanup:
+	for (i = 0; i < from->nr; i++) {
+		clear_commit_marks(list[i], RESULT);
+		clear_commit_marks(list[i], assign_flag);
+	}
+	return result;
 }
 
 int can_all_from_reach(struct commit_list *from, struct commit_list *to,
@@ -581,6 +604,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 	time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0;
 	struct commit_list *from_iter = from, *to_iter = to;
 	int result;
+	uint32_t min_generation = GENERATION_NUMBER_INFINITY;
 
 	while (from_iter) {
 		add_object_array(&from_iter->item->object, NULL, &from_objs);
@@ -588,6 +612,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 		if (!parse_commit(from_iter->item)) {
 			if (from_iter->item->date < min_commit_date)
 				min_commit_date = from_iter->item->date;
+
+			if (from_iter->item->generation < min_generation)
+				min_generation = from_iter->item->generation;
 		}
 
 		from_iter = from_iter->next;
@@ -597,6 +624,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 		if (!parse_commit(to_iter->item)) {
 			if (to_iter->item->date < min_commit_date)
 				min_commit_date = to_iter->item->date;
+
+			if (to_iter->item->generation < min_generation)
+				min_generation = to_iter->item->generation;
 		}
 
 		to_iter->item->object.flags |= PARENT2;
@@ -605,7 +635,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 	}
 
 	result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1,
-					      min_commit_date);
+					      min_commit_date, min_generation);
 
 	while (from) {
 		clear_commit_marks(from->item, PARENT1);
diff --git a/commit-reach.h b/commit-reach.h
index 58de0df56..482d9eb5d 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -63,11 +63,13 @@ int reachable(struct commit *from, int with_flag, int assign_flag,
  * Determine if every commit in 'from' can reach at least one commit
  * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
  * as a marker for commits that are already visited. Do not walk
- * commits with date below 'min_commit_date'.
+ * commits with date below 'min_commit_date' or generation below
+ * 'min_generation'.
  */
 int can_all_from_reach_with_flag(struct object_array *from,
 				 int with_flag, int assign_flag,
-				 time_t min_commit_date);
+				 time_t min_commit_date,
+				 uint32_t min_generation);
 int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 		       int commit_date_cutoff);
 
diff --git a/upload-pack.c b/upload-pack.c
index 11c426685..1e498f118 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -338,11 +338,14 @@ static int got_oid(const char *hex, struct object_id *oid)
 
 static int ok_to_give_up(void)
 {
+	uint32_t min_generation = GENERATION_NUMBER_ZERO;
+
 	if (!have_obj.nr)
 		return 0;
 
 	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE,
-					    COMMON_KNOWN, oldest_have);
+					    COMMON_KNOWN, oldest_have,
+					    min_generation);
 }
 
 static int get_common_commits(void)
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 06/16] upload-pack: generalize commit date cutoff
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (5 preceding siblings ...)
  2018-06-28 12:31 ` [PATCH 15/16] commit-reach: make can_all_from_reach... linear Derrick Stolee via GitGitGadget
@ 2018-07-12 20:47 ` Derrick Stolee via GitGitGadget
  2018-07-16 19:38   ` Stefan Beller
  2018-07-12 20:52 ` [PATCH 07/16] commit-reach: move can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
                   ` (10 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-12 20:47 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The ok_to_give_up() method uses the commit date as a cutoff to avoid
walking the entire reachble set of commits. Before moving the
reachable() method to commit-reach.c, pull out the dependence on the
global constant 'oldest_have' with a 'min_commit_date' parameter.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 upload-pack.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index e7ad7f24b..96298e50e 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -336,7 +336,8 @@ static int got_oid(const char *hex, struct object_id *oid)
 	return 0;
 }
 
-static int reachable(struct commit *from, int with_flag, int assign_flag)
+static int reachable(struct commit *from, int with_flag, int assign_flag,
+		     time_t min_commit_date)
 {
 	struct prio_queue work = { compare_commits_by_commit_date };
 
@@ -354,7 +355,7 @@ static int reachable(struct commit *from, int with_flag, int assign_flag)
 		if (commit->object.flags & REACHABLE)
 			continue;
 		commit->object.flags |= REACHABLE;
-		if (commit->date < oldest_have)
+		if (commit->date < min_commit_date)
 			continue;
 		for (list = commit->parents; list; list = list->next) {
 			struct commit *parent = list->item;
@@ -371,10 +372,12 @@ static int reachable(struct commit *from, int with_flag, int assign_flag)
 /*
  * Determine if every commit in 'from' can reach at least one commit
  * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
- * as a marker for commits that are already visited.
+ * as a marker for commits that are already visited. Do not walk
+ * commits with date below 'min_commit_date'.
  */
 static int can_all_from_reach_with_flag(struct object_array *from,
-					int with_flag, int assign_flag)
+					int with_flag, int assign_flag,
+					time_t min_commit_date)
 {
 	int i;
 
@@ -393,7 +396,8 @@ static int can_all_from_reach_with_flag(struct object_array *from,
 			from->objects[i].item->flags |= assign_flag;
 			continue;
 		}
-		if (!reachable((struct commit *)from_one, with_flag, assign_flag))
+		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
+			       min_commit_date))
 			return 0;
 	}
 	return 1;
@@ -404,7 +408,8 @@ static int ok_to_give_up(void)
 	if (!have_obj.nr)
 		return 0;
 
-	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, COMMON_KNOWN);
+	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE,
+					    COMMON_KNOWN, oldest_have);
 }
 
 static int get_common_commits(void)
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 07/16] commit-reach: move can_all_from_reach_with_flags
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (6 preceding siblings ...)
  2018-07-12 20:47 ` [PATCH 06/16] upload-pack: generalize commit date cutoff Derrick Stolee via GitGitGadget
@ 2018-07-12 20:52 ` Derrick Stolee via GitGitGadget
  2018-07-16 22:37   ` Jonathan Tan
  2018-07-13 14:06 ` [PATCH 08/16] test-reach: create new test tool for ref_newer Derrick Stolee via GitGitGadget
                   ` (9 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-12 20:52 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 62 +++++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h | 13 ++++++++++
 upload-pack.c  | 69 +-------------------------------------------------
 3 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index 9e56f90ea..e07ff993f 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -9,6 +9,7 @@
 #include "commit-reach.h"
 
 /* Remember to update object flag allocation in object.h */
+#define REACHABLE       (1u<<15)
 #define PARENT1		(1u<<16)
 #define PARENT2		(1u<<17)
 #define STALE		(1u<<18)
@@ -531,3 +532,64 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 		return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
 	return is_descendant_of(commit, list);
 }
+
+int reachable(struct commit *from, int with_flag, int assign_flag,
+	      time_t min_commit_date)
+{
+	struct prio_queue work = { compare_commits_by_commit_date };
+
+	prio_queue_put(&work, from);
+	while (work.nr) {
+		struct commit_list *list;
+		struct commit *commit = prio_queue_get(&work);
+
+		if (commit->object.flags & with_flag) {
+			from->object.flags |= assign_flag;
+			break;
+		}
+		if (!commit->object.parsed)
+			parse_object(the_repository, &commit->object.oid);
+		if (commit->object.flags & REACHABLE)
+			continue;
+		commit->object.flags |= REACHABLE;
+		if (commit->date < min_commit_date)
+			continue;
+		for (list = commit->parents; list; list = list->next) {
+			struct commit *parent = list->item;
+			if (!(parent->object.flags & REACHABLE))
+				prio_queue_put(&work, parent);
+		}
+	}
+	from->object.flags |= REACHABLE;
+	clear_commit_marks(from, REACHABLE);
+	clear_prio_queue(&work);
+	return (from->object.flags & assign_flag);
+}
+
+int can_all_from_reach_with_flag(struct object_array *from,
+				 int with_flag, int assign_flag,
+				 time_t min_commit_date)
+{
+	int i;
+
+	for (i = 0; i < from->nr; i++) {
+		struct object *from_one = from->objects[i].item;
+
+		if (from_one->flags & assign_flag)
+			continue;
+		from_one = deref_tag(the_repository, from_one, "a from object", 0);
+		if (!from_one || from_one->type != OBJ_COMMIT) {
+			/* no way to tell if this is reachable by
+			 * looking at the ancestry chain alone, so
+			 * leave a note to ourselves not to worry about
+			 * this object anymore.
+			 */
+			from->objects[i].item->flags |= assign_flag;
+			continue;
+		}
+		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
+			       min_commit_date))
+			return 0;
+	}
+	return 1;
+}
diff --git a/commit-reach.h b/commit-reach.h
index 925cb05d7..ce3b963e6 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -56,4 +56,17 @@ define_commit_slab(contains_cache, enum contains_result);
 int commit_contains(struct ref_filter *filter, struct commit *commit,
 		    struct commit_list *list, struct contains_cache *cache);
 
+int reachable(struct commit *from, int with_flag, int assign_flag,
+	      time_t min_commit_date);
+
+/*
+ * Determine if every commit in 'from' can reach at least one commit
+ * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
+ * as a marker for commits that are already visited. Do not walk
+ * commits with date below 'min_commit_date'.
+ */
+int can_all_from_reach_with_flag(struct object_array *from,
+				 int with_flag, int assign_flag,
+				 time_t min_commit_date);
+
 #endif
diff --git a/upload-pack.c b/upload-pack.c
index 96298e50e..11c426685 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -24,13 +24,13 @@
 #include "quote.h"
 #include "upload-pack.h"
 #include "serve.h"
+#include "commit-reach.h"
 
 /* Remember to update object flag allocation in object.h */
 #define THEY_HAVE	(1u << 11)
 #define OUR_REF		(1u << 12)
 #define WANTED		(1u << 13)
 #define COMMON_KNOWN	(1u << 14)
-#define REACHABLE	(1u << 15)
 
 #define SHALLOW		(1u << 16)
 #define NOT_SHALLOW	(1u << 17)
@@ -336,73 +336,6 @@ static int got_oid(const char *hex, struct object_id *oid)
 	return 0;
 }
 
-static int reachable(struct commit *from, int with_flag, int assign_flag,
-		     time_t min_commit_date)
-{
-	struct prio_queue work = { compare_commits_by_commit_date };
-
-	prio_queue_put(&work, from);
-	while (work.nr) {
-		struct commit_list *list;
-		struct commit *commit = prio_queue_get(&work);
-
-		if (commit->object.flags & with_flag) {
-			from->object.flags |= assign_flag;
-			break;
-		}
-		if (!commit->object.parsed)
-			parse_object(the_repository, &commit->object.oid);
-		if (commit->object.flags & REACHABLE)
-			continue;
-		commit->object.flags |= REACHABLE;
-		if (commit->date < min_commit_date)
-			continue;
-		for (list = commit->parents; list; list = list->next) {
-			struct commit *parent = list->item;
-			if (!(parent->object.flags & REACHABLE))
-				prio_queue_put(&work, parent);
-		}
-	}
-	from->object.flags |= REACHABLE;
-	clear_commit_marks(from, REACHABLE);
-	clear_prio_queue(&work);
-	return (from->object.flags & assign_flag);
-}
-
-/*
- * Determine if every commit in 'from' can reach at least one commit
- * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
- * as a marker for commits that are already visited. Do not walk
- * commits with date below 'min_commit_date'.
- */
-static int can_all_from_reach_with_flag(struct object_array *from,
-					int with_flag, int assign_flag,
-					time_t min_commit_date)
-{
-	int i;
-
-	for (i = 0; i < from->nr; i++) {
-		struct object *from_one = from->objects[i].item;
-
-		if (from_one->flags & assign_flag)
-			continue;
-		from_one = deref_tag(the_repository, from_one, "a from object", 0);
-		if (!from_one || from_one->type != OBJ_COMMIT) {
-			/* no way to tell if this is reachable by
-			 * looking at the ancestry chain alone, so
-			 * leave a note to ourselves not to worry about
-			 * this object anymore.
-			 */
-			from->objects[i].item->flags |= assign_flag;
-			continue;
-		}
-		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
-			       min_commit_date))
-			return 0;
-	}
-	return 1;
-}
-
 static int ok_to_give_up(void)
 {
 	if (!have_obj.nr)
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 08/16] test-reach: create new test tool for ref_newer
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (7 preceding siblings ...)
  2018-07-12 20:52 ` [PATCH 07/16] commit-reach: move can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
@ 2018-07-13 14:06 ` Derrick Stolee via GitGitGadget
  2018-07-16 23:00   ` Jonathan Tan
  2018-07-13 14:28 ` [PATCH 09/16] test-reach: test in_merge_bases Derrick Stolee via GitGitGadget
                   ` (8 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 14:06 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

As we prepare to change the behavior of the algorithms in
commit-reach.c, create a new test-tool subcommand 'reach' to test these
methods on interesting commit-graph shapes.

To use the new test-tool, use 'test-tool reach <method>' and provide
input to stdin that describes the inputs to the method. Currently, we
only implement the ref_newer method, which requires two commits. Use
lines "A:<committish>" and "B:<committish>" for the two inputs. We will
expand this input later to accommodate methods that take lists of
commits.

The test t6600-test-reach.sh creates a repo whose commits form a
two-dimensional grid. This grid makes it easy for us to determine
reachability because commit-A-B can reach commit-X-Y if and only if A is
at least X and B is at least Y. This helps create interesting test cases
for each result of the methods in commit-reach.c.

We test all methods in three different states of the commit-graph file:
Non-existent (no generation numbers), fully computed, and mixed (some
commits have generation numbers and others do not).

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Makefile              |  1 +
 t/helper/test-reach.c | 62 +++++++++++++++++++++++++++++++
 t/helper/test-tool.c  |  1 +
 t/helper/test-tool.h  |  1 +
 t/t6600-test-reach.sh | 86 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 151 insertions(+)
 create mode 100644 t/helper/test-reach.c
 create mode 100755 t/t6600-test-reach.sh

diff --git a/Makefile b/Makefile
index 59781f4bc..d69f9d415 100644
--- a/Makefile
+++ b/Makefile
@@ -716,6 +716,7 @@ TEST_BUILTINS_OBJS += test-mktemp.o
 TEST_BUILTINS_OBJS += test-online-cpus.o
 TEST_BUILTINS_OBJS += test-path-utils.o
 TEST_BUILTINS_OBJS += test-prio-queue.o
+TEST_BUILTINS_OBJS += test-reach.o
 TEST_BUILTINS_OBJS += test-read-cache.o
 TEST_BUILTINS_OBJS += test-ref-store.o
 TEST_BUILTINS_OBJS += test-regex.o
diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
new file mode 100644
index 000000000..8cc570f3b
--- /dev/null
+++ b/t/helper/test-reach.c
@@ -0,0 +1,62 @@
+#include "test-tool.h"
+#include "cache.h"
+#include "commit-reach.h"
+#include "config.h"
+#include "parse-options.h"
+#include "tag.h"
+
+int cmd__reach(int ac, const char **av)
+{
+	struct object_id oid_A, oid_B;
+	struct strbuf buf = STRBUF_INIT;
+	struct repository *r = the_repository;
+
+	setup_git_directory();
+
+	if (ac < 2)
+		exit(1);
+
+
+	while (strbuf_getline(&buf, stdin) != EOF) {
+		struct object_id oid;
+		struct object *o;
+		struct commit *c;
+		if (buf.len < 3)
+			continue;
+
+		if (get_oid_committish(buf.buf + 2, &oid))
+			die("failed to resolve %s", buf.buf + 2);
+
+		o = parse_object(r, &oid);
+		o = deref_tag_noverify(o);
+
+		if (!o)
+			die("failed to load commit for input %s resulting in oid %s\n",
+			    buf.buf, oid_to_hex(&oid));
+
+		c = object_as_type(r, o, OBJ_COMMIT, 0);
+
+		if (!c)
+			die("failed to load commit for input %s resulting in oid %s\n",
+			    buf.buf, oid_to_hex(&oid));
+
+		switch (buf.buf[0]) {
+			case 'A':
+				oidcpy(&oid_A, &oid);
+				break;
+
+			case 'B':
+				oidcpy(&oid_B, &oid);
+				break;
+
+			default:
+				die("unexpected start of line: %c", buf.buf[0]);
+		}
+	}
+	strbuf_release(&buf);
+
+	if (!strcmp(av[1], "ref_newer"))
+		printf("%s:%d\n", av[1], ref_newer(&oid_A, &oid_B));
+
+	exit(0);
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index dafc91c24..582d02adf 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -26,6 +26,7 @@ static struct test_cmd cmds[] = {
 	{ "online-cpus", cmd__online_cpus },
 	{ "path-utils", cmd__path_utils },
 	{ "prio-queue", cmd__prio_queue },
+	{ "reach", cmd__reach },
 	{ "read-cache", cmd__read_cache },
 	{ "ref-store", cmd__ref_store },
 	{ "regex", cmd__regex },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 80cbcf085..a7e53c420 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -20,6 +20,7 @@ int cmd__mktemp(int argc, const char **argv);
 int cmd__online_cpus(int argc, const char **argv);
 int cmd__path_utils(int argc, const char **argv);
 int cmd__prio_queue(int argc, const char **argv);
+int cmd__reach(int argc, const char **argv);
 int cmd__read_cache(int argc, const char **argv);
 int cmd__ref_store(int argc, const char **argv);
 int cmd__regex(int argc, const char **argv);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
new file mode 100755
index 000000000..4ffe0174d
--- /dev/null
+++ b/t/t6600-test-reach.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+test_description='basic commit reachability tests'
+
+. ./test-lib.sh
+
+# Construct a grid-like commit graph with points (x,y)
+# with 1 <= x <= 10, 1 <= y <= 10, where (x,y) has
+# parents (x-1, y) and (x, y-1), keeping in mind that
+# we drop a parent if a coordinate is nonpositive.
+#
+#             (10,10)
+#            /       \
+#         (10,9)    (9,10)
+#        /     \   /      \
+#    (10,8)    (9,9)      (8,10)
+#   /     \    /   \      /    \
+#         ( continued...)
+#   \     /    \   /      \    /
+#    (3,1)     (2,2)      (1,3)
+#        \     /    \     /
+#         (2,1)      (2,1)
+#              \    /
+#              (1,1)
+#
+# We use branch 'comit-x-y' to refer to (x,y).
+# This grid allows interesting reachability and
+# non-reachability queries: (x,y) can reach (x',y')
+# if and only if x' <= x and y' <= y.
+test_expect_success 'setup' '
+	for i in $(test_seq 1 10)
+	do
+		test_commit "1-$i" &&
+		git branch -f commit-1-$i
+	done &&
+	for j in $(test_seq 1 9)
+	do
+		git reset --hard commit-$j-1 &&
+		x=$(($j + 1)) &&
+		test_commit "$x-1" &&
+		git branch -f commit-$x-1 &&
+
+		for i in $(test_seq 2 10)
+		do
+			git merge commit-$j-$i -m "$x-$i" &&
+			git branch -f commit-$x-$i
+		done
+	done &&
+	git commit-graph write --reachable &&
+	mv .git/objects/info/commit-graph commit-graph-full &&
+	git show-ref -s commit-7-7 | git commit-graph write --stdin-commits &&
+	mv .git/objects/info/commit-graph commit-graph-half &&
+	git config core.commitGraph true
+'
+
+test_three_modes () {
+	test_when_finished rm -rf .git/objects/info/commit-graph &&
+	test-tool reach $1 <input >actual &&
+	test_cmp expect actual &&
+	cp commit-graph-full .git/objects/info/commit-graph &&
+	test-tool reach $1 <input >actual &&
+	test_cmp expect actual &&
+	cp commit-graph-half .git/objects/info/commit-graph &&
+	test-tool reach $1 <input >actual &&
+	test_cmp expect actual
+}
+
+test_expect_success 'ref_newer:miss' '
+	cat >input <<-\EOF &&
+		A:commit-5-7
+		B:commit-4-9
+	EOF
+	printf "ref_newer:0\n" >expect &&
+	test_three_modes ref_newer
+'
+
+test_expect_success 'ref_newer:hit' '
+	cat >input <<-\EOF &&
+		A:commit-5-7
+		B:commit-2-3
+	EOF
+	printf "ref_newer:1\n" >expect &&
+	test_three_modes ref_newer
+'
+
+test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 09/16] test-reach: test in_merge_bases
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (8 preceding siblings ...)
  2018-07-13 14:06 ` [PATCH 08/16] test-reach: create new test tool for ref_newer Derrick Stolee via GitGitGadget
@ 2018-07-13 14:28 ` Derrick Stolee via GitGitGadget
  2018-07-13 14:38 ` [PATCH 10/16] test-reach: test is_descendant_of Derrick Stolee via GitGitGadget
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 14:28 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c |  6 ++++++
 t/t6600-test-reach.sh | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 8cc570f3b..29104d41a 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -8,6 +8,7 @@
 int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
+	struct commit *A, *B;
 	struct strbuf buf = STRBUF_INIT;
 	struct repository *r = the_repository;
 
@@ -16,6 +17,7 @@ int cmd__reach(int ac, const char **av)
 	if (ac < 2)
 		exit(1);
 
+	A = B = NULL;
 
 	while (strbuf_getline(&buf, stdin) != EOF) {
 		struct object_id oid;
@@ -43,10 +45,12 @@ int cmd__reach(int ac, const char **av)
 		switch (buf.buf[0]) {
 			case 'A':
 				oidcpy(&oid_A, &oid);
+				A = c;
 				break;
 
 			case 'B':
 				oidcpy(&oid_B, &oid);
+				B = c;
 				break;
 
 			default:
@@ -57,6 +61,8 @@ int cmd__reach(int ac, const char **av)
 
 	if (!strcmp(av[1], "ref_newer"))
 		printf("%s:%d\n", av[1], ref_newer(&oid_A, &oid_B));
+	else if (!strcmp(av[1], "in_merge_bases"))
+		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
 
 	exit(0);
 }
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 4ffe0174d..d065f2602 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -83,4 +83,22 @@ test_expect_success 'ref_newer:hit' '
 	test_three_modes ref_newer
 '
 
+test_expect_success 'in_merge_bases:hit' '
+	cat >input <<- EOF &&
+		A:commit-5-7
+		B:commit-8-8
+	EOF
+	printf "in_merge_bases(A,B):1\n" >expect &&
+	test_three_modes in_merge_bases
+'
+
+test_expect_success 'in_merge_bases:miss' '
+	cat >input <<- EOF &&
+		A:commit-6-8
+		B:commit-5-9
+	EOF
+	printf "in_merge_bases(A,B):0\n" >expect &&
+	test_three_modes in_merge_bases
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 10/16] test-reach: test is_descendant_of
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (9 preceding siblings ...)
  2018-07-13 14:28 ` [PATCH 09/16] test-reach: test in_merge_bases Derrick Stolee via GitGitGadget
@ 2018-07-13 14:38 ` Derrick Stolee via GitGitGadget
  2018-07-13 14:51 ` [PATCH 11/16] test-reach: test get_merge_bases_many Derrick Stolee via GitGitGadget
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 14:38 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The is_descendant_of method takes a single commit as its first parameter
and a list of commits as its second parameter. Extend the input of the
'test-tool reach' command to take multiple lines of the form
"X:<committish>" to construct a list of commits. Pass these to
is_descendant_of and create tests that check each result.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c |  8 ++++++++
 t/t6600-test-reach.sh | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 29104d41a..149e8f32c 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -9,6 +9,7 @@ int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
 	struct commit *A, *B;
+	struct commit_list *X;
 	struct strbuf buf = STRBUF_INIT;
 	struct repository *r = the_repository;
 
@@ -18,6 +19,7 @@ int cmd__reach(int ac, const char **av)
 		exit(1);
 
 	A = B = NULL;
+	X = NULL;
 
 	while (strbuf_getline(&buf, stdin) != EOF) {
 		struct object_id oid;
@@ -53,6 +55,10 @@ int cmd__reach(int ac, const char **av)
 				B = c;
 				break;
 
+			case 'X':
+				commit_list_insert(c, &X);
+				break;
+
 			default:
 				die("unexpected start of line: %c", buf.buf[0]);
 		}
@@ -63,6 +69,8 @@ int cmd__reach(int ac, const char **av)
 		printf("%s:%d\n", av[1], ref_newer(&oid_A, &oid_B));
 	else if (!strcmp(av[1], "in_merge_bases"))
 		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
+	else if (!strcmp(av[1], "is_descendant_of"))
+		printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X));
 
 	exit(0);
 }
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index d065f2602..99444e0f7 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -101,4 +101,26 @@ test_expect_success 'in_merge_bases:miss' '
 	test_three_modes in_merge_bases
 '
 
+test_expect_success 'is_descendant_of:hit' '
+	cat >input <<-\EOF &&
+		A:commit-5-7
+		X:commit-4-8
+		X:commit-6-6
+		X:commit-1-1
+	EOF
+	printf "is_descendant_of(A,X):1\n" >expect &&
+	test_three_modes is_descendant_of
+'
+
+test_expect_success 'is_descendant_of:miss' '
+	cat >input <<-\EOF &&
+		A:commit-6-8
+		X:commit-5-9
+		X:commit-4-10
+		X:commit-7-6
+	EOF
+	printf "is_descendant_of(A,X):0\n" >expect &&
+	test_three_modes is_descendant_of
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 11/16] test-reach: test get_merge_bases_many
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (10 preceding siblings ...)
  2018-07-13 14:38 ` [PATCH 10/16] test-reach: test is_descendant_of Derrick Stolee via GitGitGadget
@ 2018-07-13 14:51 ` Derrick Stolee via GitGitGadget
  2018-07-16 21:24   ` Stefan Beller
  2018-07-16 23:08   ` Jonathan Tan
  2018-07-13 16:51 ` [PATCH 12/16] test-reach: test reduce_heads Derrick Stolee via GitGitGadget
                   ` (5 subsequent siblings)
  17 siblings, 2 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 14:51 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The get_merge_bases_many method returns a list of merge bases for a
single commit (A) against a list of commits (X). Some care is needed in
constructing the expected behavior because the result is not the
expected merge-base for an octopus merge with those parents but instead
the set of maximal commits that are reachable from A and at least one of
the commits in X.

Add get_merge_bases_many to 'test-tool reach' and create a test that
demonstrates that this output returns multiple results. Specifically, we
select a list of three commits such that we output two commits that are
reachable from one of the first two, respectively, and none are
reachable from the third.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c | 15 +++++++++++++++
 t/t6600-test-reach.sh | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 149e8f32c..97c726040 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -10,6 +10,8 @@ int cmd__reach(int ac, const char **av)
 	struct object_id oid_A, oid_B;
 	struct commit *A, *B;
 	struct commit_list *X;
+	struct commit **X_array;
+	int X_nr, X_alloc;
 	struct strbuf buf = STRBUF_INIT;
 	struct repository *r = the_repository;
 
@@ -20,6 +22,9 @@ int cmd__reach(int ac, const char **av)
 
 	A = B = NULL;
 	X = NULL;
+	X_nr = 0;
+	X_alloc = 16;
+	ALLOC_ARRAY(X_array, X_alloc);
 
 	while (strbuf_getline(&buf, stdin) != EOF) {
 		struct object_id oid;
@@ -57,6 +62,8 @@ int cmd__reach(int ac, const char **av)
 
 			case 'X':
 				commit_list_insert(c, &X);
+				ALLOC_GROW(X_array, X_nr + 1, X_alloc);
+				X_array[X_nr++] = c;
 				break;
 
 			default:
@@ -71,6 +78,14 @@ int cmd__reach(int ac, const char **av)
 		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
 	else if (!strcmp(av[1], "is_descendant_of"))
 		printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X));
+	else if (!strcmp(av[1], "get_merge_bases_many")) {
+		struct commit_list *list = get_merge_bases_many(A, X_nr, X_array);
+		printf("%s(A,X):\n", av[1]);
+		while (list) {
+			printf("%s\n", oid_to_hex(&list->item->object.oid));
+			list = list->next;
+		}
+	}
 
 	exit(0);
 }
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 99444e0f7..19e39ad7c 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -123,4 +123,19 @@ test_expect_success 'is_descendant_of:miss' '
 	test_three_modes is_descendant_of
 '
 
+test_expect_success 'get_merge_bases_many' '
+	cat >input <<-\EOF &&
+		A:commit-5-7
+		X:commit-4-8
+		X:commit-6-6
+		X:commit-8-3
+	EOF
+	{
+		printf "get_merge_bases_many(A,X):\n" &&
+		git rev-parse commit-5-6 &&
+		git rev-parse commit-4-7
+	} >expect &&
+	test_three_modes get_merge_bases_many
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 12/16] test-reach: test reduce_heads
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (11 preceding siblings ...)
  2018-07-13 14:51 ` [PATCH 11/16] test-reach: test get_merge_bases_many Derrick Stolee via GitGitGadget
@ 2018-07-13 16:51 ` Derrick Stolee via GitGitGadget
  2018-07-16 21:30   ` Stefan Beller
  2018-07-13 17:22 ` [PATCH 13/16] test-reach: test can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
                   ` (4 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 16:51 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c |  7 +++++++
 t/t6600-test-reach.sh | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 97c726040..73cb55208 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -85,6 +85,13 @@ int cmd__reach(int ac, const char **av)
 			printf("%s\n", oid_to_hex(&list->item->object.oid));
 			list = list->next;
 		}
+	} else if (!strcmp(av[1], "reduce_heads")) {
+		struct commit_list *list = reduce_heads(X);
+		printf("%s(X):\n", av[1]);
+		while (list) {
+			printf("%s\n", oid_to_hex(&list->item->object.oid));
+			list = list->next;
+		}
 	}
 
 	exit(0);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 19e39ad7c..e365a4ea6 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -138,4 +138,26 @@ test_expect_success 'get_merge_bases_many' '
 	test_three_modes get_merge_bases_many
 '
 
+test_expect_success 'reduce_heads' '
+	cat >input <<-\EOF &&
+		X:commit-1-10
+		X:commit-2-8
+		X:commit-3-6
+		X:commit-4-4
+		X:commit-1-7
+		X:commit-2-5
+		X:commit-3-3
+		X:commit-5-1
+	EOF
+	{
+		printf "reduce_heads(X):\n" &&
+		git rev-parse commit-5-1 &&
+		git rev-parse commit-4-4 &&
+		git rev-parse commit-3-6 &&
+		git rev-parse commit-2-8 &&
+		git rev-parse commit-1-10
+	} >expect &&
+	test_three_modes reduce_heads
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 13/16] test-reach: test can_all_from_reach_with_flags
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (12 preceding siblings ...)
  2018-07-13 16:51 ` [PATCH 12/16] test-reach: test reduce_heads Derrick Stolee via GitGitGadget
@ 2018-07-13 17:22 ` Derrick Stolee via GitGitGadget
  2018-07-16 21:54   ` Stefan Beller
  2018-07-17  0:10   ` Jonathan Tan
  2018-07-13 18:37 ` [PATCH 14/16] commit-reach: replace ref_newer logic Derrick Stolee via GitGitGadget
                   ` (3 subsequent siblings)
  17 siblings, 2 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 17:22 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The can_all_from_reach_with_flags method is used by ok_to_give_up in
upload-pack.c to see if we have done enough negotiation during a fetch.
This method is intentionally created to preserve state between calls to
assist with stateful negotiation, such as over SSH.

To make this method testable, add a new can_all_from_reach method that
does the initial setup and final tear-down. Call the method from
'test-tool reach'.

Since this is a many-to-many reachability query, add a new type of input
to the 'test-tool reach' input format. Lines "Y:<committish>" create a
list of commits to be the reachability targets from the commits in the
'X' list. In the context of fetch negotiation, the 'X' commits are the
'want' commits and the 'Y' commits are the 'have' commits.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c        | 47 +++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h        |  2 ++
 t/helper/test-reach.c | 10 +++++++--
 t/t6600-test-reach.sh | 45 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index e07ff993f..c5e89a2de 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -593,3 +593,50 @@ int can_all_from_reach_with_flag(struct object_array *from,
 	}
 	return 1;
 }
+
+int can_all_from_reach(struct commit_list *from, struct commit_list *to,
+		       int cutoff_by_min_date)
+{
+	struct object_array from_objs = OBJECT_ARRAY_INIT;
+	time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0;
+	struct commit_list *from_iter = from, *to_iter = to;
+	int result;
+
+	while (from_iter) {
+		add_object_array(&from_iter->item->object, NULL, &from_objs);
+
+		if (!parse_commit(from_iter->item)) {
+			if (from_iter->item->date < min_commit_date)
+				min_commit_date = from_iter->item->date;
+		}
+
+		from_iter = from_iter->next;
+	}
+
+	while (to_iter) {
+		if (!parse_commit(to_iter->item)) {
+			if (to_iter->item->date < min_commit_date)
+				min_commit_date = to_iter->item->date;
+		}
+
+		to_iter->item->object.flags |= PARENT2;
+
+		to_iter = to_iter->next;
+	}
+
+	result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1,
+					      min_commit_date);
+
+	while (from) {
+		clear_commit_marks(from->item, PARENT1);
+		from = from->next;
+	}
+
+	while (to) {
+		clear_commit_marks(to->item, PARENT2);
+		to = to->next;
+	}
+
+	object_array_clear(&from_objs);
+	return result;
+}
diff --git a/commit-reach.h b/commit-reach.h
index ce3b963e6..58de0df56 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -68,5 +68,7 @@ int reachable(struct commit *from, int with_flag, int assign_flag,
 int can_all_from_reach_with_flag(struct object_array *from,
 				 int with_flag, int assign_flag,
 				 time_t min_commit_date);
+int can_all_from_reach(struct commit_list *from, struct commit_list *to,
+		       int commit_date_cutoff);
 
 #endif
diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 73cb55208..dc9710099 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -9,7 +9,7 @@ int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
 	struct commit *A, *B;
-	struct commit_list *X;
+	struct commit_list *X, *Y;
 	struct commit **X_array;
 	int X_nr, X_alloc;
 	struct strbuf buf = STRBUF_INIT;
@@ -21,7 +21,7 @@ int cmd__reach(int ac, const char **av)
 		exit(1);
 
 	A = B = NULL;
-	X = NULL;
+	X = Y = NULL;
 	X_nr = 0;
 	X_alloc = 16;
 	ALLOC_ARRAY(X_array, X_alloc);
@@ -66,6 +66,10 @@ int cmd__reach(int ac, const char **av)
 				X_array[X_nr++] = c;
 				break;
 
+			case 'Y':
+				commit_list_insert(c, &Y);
+				break;
+
 			default:
 				die("unexpected start of line: %c", buf.buf[0]);
 		}
@@ -92,6 +96,8 @@ int cmd__reach(int ac, const char **av)
 			printf("%s\n", oid_to_hex(&list->item->object.oid));
 			list = list->next;
 		}
+	} else if (!strcmp(av[1], "can_all_from_reach")) {
+		printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1));
 	}
 
 	exit(0);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index e365a4ea6..cb07c6431 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -160,4 +160,49 @@ test_expect_success 'reduce_heads' '
 	test_three_modes reduce_heads
 '
 
+test_expect_success 'can_all_from_reach:hit' '
+	cat >input <<-\EOF &&
+		X:commit-2-10
+		X:commit-3-9
+		X:commit-4-8
+		X:commit-5-7
+		X:commit-6-6
+		X:commit-7-5
+		X:commit-8-4
+		X:commit-9-3
+		Y:commit-1-9
+		Y:commit-2-8
+		Y:commit-3-7
+		Y:commit-4-6
+		Y:commit-5-5
+		Y:commit-6-4
+		Y:commit-7-3
+		Y:commit-8-1
+	EOF
+	printf "can_all_from_reach(X,Y):1\n" >expect &&
+	test_three_modes can_all_from_reach
+'
+
+test_expect_success 'can_all_from_reach:miss' '
+	cat >input <<-\EOF &&
+		X:commit-2-10
+		X:commit-3-9
+		X:commit-4-8
+		X:commit-5-7
+		X:commit-6-6
+		X:commit-7-5
+		X:commit-8-4
+		X:commit-9-3
+		Y:commit-1-9
+		Y:commit-2-8
+		Y:commit-3-7
+		Y:commit-4-6
+		Y:commit-5-5
+		Y:commit-6-4
+		Y:commit-8-5
+	EOF
+	printf "can_all_from_reach(X,Y):0\n" >expect &&
+	test_three_modes can_all_from_reach
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 14/16] commit-reach: replace ref_newer logic
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (13 preceding siblings ...)
  2018-07-13 17:22 ` [PATCH 13/16] test-reach: test can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
@ 2018-07-13 18:37 ` Derrick Stolee via GitGitGadget
  2018-07-16 22:16   ` Stefan Beller
  2018-07-13 19:25 ` [PATCH 16/16] commit-reach: use can_all_from_reach Derrick Stolee via GitGitGadget
                   ` (2 subsequent siblings)
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 18:37 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The ref_newer method is used by 'git push' to check if a force-push is
required. This method does not use any kind of cutoff when walking, so
in the case of a force-push will walk all reachable commits.

The is_descendant_of method already uses paint_down_to_common along with
cutoffs. By translating the ref_newer arguments into the commit and
commit_list required by is_descendant_of, we can have one fewer commit
walk and also improve our performance!

For a copy of the Linux repository, 'test-tool reach ref_newer' presents
the following improvements with the specified input. In the case that
ref_newer returns 1, there is no improvement. The improvement is in the
second case where ref_newer returns 0.

Input
-----
A:v4.9
B:v3.19

Before: 0.09 s
 After: 0.09 s

To test the negative case, add a new commit with parent v3.19,
regenerate the commit-graph, and then run with B pointing at that
commit.

Before: 0.43 s
 After: 0.09 s

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index c5e89a2de..c58e50fbb 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -365,20 +365,11 @@ void reduce_heads_replace(struct commit_list **heads)
 	*heads = result;
 }
 
-static void unmark_and_free(struct commit_list *list, unsigned int mark)
-{
-	while (list) {
-		struct commit *commit = pop_commit(&list);
-		commit->object.flags &= ~mark;
-	}
-}
-
 int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
 {
 	struct object *o;
 	struct commit *old_commit, *new_commit;
-	struct commit_list *list, *used;
-	int found = 0;
+	struct commit_list *old_commit_list = NULL;
 
 	/*
 	 * Both new_commit and old_commit must be commit-ish and new_commit is descendant of
@@ -399,19 +390,8 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
 	if (parse_commit(new_commit) < 0)
 		return 0;
 
-	used = list = NULL;
-	commit_list_insert(new_commit, &list);
-	while (list) {
-		new_commit = pop_most_recent_commit(&list, TMP_MARK);
-		commit_list_insert(new_commit, &used);
-		if (new_commit == old_commit) {
-			found = 1;
-			break;
-		}
-	}
-	unmark_and_free(list, TMP_MARK);
-	unmark_and_free(used, TMP_MARK);
-	return found;
+	commit_list_insert(old_commit, &old_commit_list);
+	return is_descendant_of(new_commit, old_commit_list);
 }
 
 /*
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 16/16] commit-reach: use can_all_from_reach
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (14 preceding siblings ...)
  2018-07-13 18:37 ` [PATCH 14/16] commit-reach: replace ref_newer logic Derrick Stolee via GitGitGadget
@ 2018-07-13 19:25 ` Derrick Stolee via GitGitGadget
  2018-07-16 22:47   ` Stefan Beller
  2018-07-16 13:54 ` [PATCH 00/16] Consolidate reachability logic Ramsay Jones
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
  17 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-13 19:25 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

The is_descendant_of method previously used in_merge_bases() to check if
the commit can reach any of the commits in the provided list. This had
two performance problems:

1. The performance is quadratic in worst-case.

2. A single in_merge_bases() call requires walking beyond the target
   commit in order to find the full set of boundary commits that may be
   merge-bases.

The can_all_from_reach method avoids this quadratic behavior and can
limit the search beyond the target commits using generation numbers. It
requires a small prototype adjustment to stop using commit-date as a
cutoff, as that optimization is no longer appropriate here.

Since in_merge_bases() uses paint_down_to_common(), is_descendant_of()
naturally found cutoffs to avoid walking the entire commit graph. Since
we want to always return the correct result, we cannot use the
min_commit_date cutoff in can_all_from_reach. We then rely on generation
numbers to provide the cutoff.

Since not all repos will have a commit-graph file, nor will we always
have generation numbers computed for a commit-graph file, create a new
method, generation_numbers_enabled(), that checks for a commit-graph
file and sees if the first commit in the file has a non-zero generation
number. In the case that we do not have generation numbers, use the old
logic for is_descendant_of().

Performance was meausured on a copy of the Linux repository using the
'test-tool reach is_descendant_of' command using this input:

A:v4.9
X:v4.10
X:v4.11
X:v4.12
X:v4.13
X:v4.14
X:v4.15
X:v4.16
X:v4.17
X.v3.0

Note that this input is tailored to demonstrate the quadratic nature of
the previous method, as it will compute merge-bases for v4.9 versus all
of the later versions before checking against v4.1.

Before: 0.26 s
 After: 0.21 s

Since we previously used the is_descendant_of method in the ref_newer
method, we also measured performance there using
'test-tool reach ref_newer' with this input:

A:v4.9
B:v3.19

Before: 0.10 s
 After: 0.08 s

By adding a new commit with parent v3.19, we test the non-reachable case
of ref_newer:

Before: 0.09 s
 After: 0.08 s

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-graph.c | 18 ++++++++++++++++++
 commit-graph.h |  6 ++++++
 commit-reach.c | 24 +++++++++++++++++-------
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index b0a55ad12..e9786fa86 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -233,6 +233,24 @@ static int prepare_commit_graph(struct repository *r)
 	return !!r->objects->commit_graph;
 }
 
+int generation_numbers_enabled(struct repository *r)
+{
+	uint32_t first_generation;
+	struct commit_graph *g;
+	if (!prepare_commit_graph(r))
+	       return 0;
+
+	g = r->objects->commit_graph;
+
+	if (!g->num_commits)
+		return 0;
+
+	first_generation = get_be32(g->chunk_commit_data +
+				    g->hash_len + 8) >> 2;
+
+	return !!first_generation;
+}
+
 static void close_commit_graph(void)
 {
 	free_commit_graph(the_repository->objects->commit_graph);
diff --git a/commit-graph.h b/commit-graph.h
index 76e098934..0de8f8831 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -51,6 +51,12 @@ struct commit_graph {
 
 struct commit_graph *load_commit_graph_one(const char *graph_file);
 
+/*
+ * Return 1 if and only if the repository has a commit-graph
+ * file and generation numbers are computed in that file.
+ */
+int generation_numbers_enabled(struct repository *r);
+
 void write_commit_graph_reachable(const char *obj_dir, int append);
 void write_commit_graph(const char *obj_dir,
 			struct string_list *pack_indexes,
diff --git a/commit-reach.c b/commit-reach.c
index ac132c8e4..9eb622540 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -276,15 +276,25 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
 {
 	if (!with_commit)
 		return 1;
-	while (with_commit) {
-		struct commit *other;
 
-		other = with_commit->item;
-		with_commit = with_commit->next;
-		if (in_merge_bases(other, commit))
-			return 1;
+	if (generation_numbers_enabled(the_repository)) {
+		struct commit_list *from_list = NULL;
+		int result;
+		commit_list_insert(commit, &from_list);
+		result = can_all_from_reach(from_list, with_commit, 0);
+		free_commit_list(from_list);
+		return result;
+	} else {
+		while (with_commit) {
+			struct commit *other;
+
+			other = with_commit->item;
+			with_commit = with_commit->next;
+			if (in_merge_bases(other, commit))
+				return 1;
+		}
+		return 0;
 	}
-	return 0;
 }
 
 /*
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH 00/16] Consolidate reachability logic
@ 2018-07-16 13:00 Derrick Stolee via GitGitGadget
  2018-06-19 20:25 ` [PATCH 04/16] upload-pack: make reachable() more generic Derrick Stolee via GitGitGadget
                   ` (17 more replies)
  0 siblings, 18 replies; 118+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2018-07-16 13:00 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

There are many places in Git that use a commit walk to determine
reachability between commits and/or refs. A lot of this logic is
duplicated.

I wanted to achieve the following:

1. Consolidate several different commit walks into one file
2. Reduce duplicate reachability logic
3. Increase testability (correctness and performance)
4. Improve performance of reachability queries

My approach is mostly in three parts:

  I. Move code to a new commit-reach.c file.
 II. Add a 'test-tool reach' command to test these methods directly.
III. Modify the logic by improving performance and calling methods with
     similar logic but different prototypes.

The 'test-tool reach' command is helpful to make sure I don't break
anything as I change the logic, but also so I can test methods that are
normally only exposed by other more complicated commands. For instance,
ref_newer() is part of 'git push -f' and ok_to_give_up() is buried deep
within fetch negotiation. Both of these methods have some problematic
performance issues that are corrected by this series. As I discovered
them, it was clear that it would be better to consolidate walk logic
instead of discovering a new walk in another file hidden somewhere.

For the ok_to_give_up() method, I refactored the method so I could pull
the logic out of the depths of fetch negotiation. In the commit
"commit-reach: make can_all_from_reach... linear" I discuss how the
existing algorithm is quadratic and how we can make it linear. Also, we
can use heuristic knowledge about the shape of the commit graph and the
usual haves/wants to get some extra performance bonus. (The heuristic is
to do a DFS with first-parents first, and stop on first found result. We
expect haves/wants to include ref tips, which typically have their
previous values in their first-parent history.)

One major difference in this series versus the RFC is that I added a new
method 'generation_numbers_enabled()' to detect if we have a commit-graph
file with non-zero generation numbers. Using can_all_from_reach in
is_descendant_of is only faster if we have generation numbers as a cutoff.

Thanks,
-Stolee

This series is based on jt/commit-graph-per-object-store

CC: sbeller@google.com

Derrick Stolee (16):
  commit-reach: move walk methods from commit.c
  commit-reach: move ref_newer from remote.c
  commit-reach: move commit_contains from ref-filter
  upload-pack: make reachable() more generic
  upload-pack: refactor ok_to_give_up()
  upload-pack: generalize commit date cutoff
  commit-reach: move can_all_from_reach_with_flags
  test-reach: create new test tool for ref_newer
  test-reach: test in_merge_bases
  test-reach: test is_descendant_of
  test-reach: test get_merge_bases_many
  test-reach: test reduce_heads
  test-reach: test can_all_from_reach_with_flags
  commit-reach: replace ref_newer logic
  commit-reach: make can_all_from_reach... linear
  commit-reach: use can_all_from_reach

 Makefile              |   2 +
 builtin/remote.c      |   1 +
 commit-graph.c        |  18 ++
 commit-graph.h        |   6 +
 commit-reach.c        | 662 ++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h        |  76 +++++
 commit.c              | 358 -----------------------
 fast-import.c         |   1 +
 http-push.c           |   1 +
 ref-filter.c          | 147 +---------
 remote.c              |  50 +---
 remote.h              |   1 -
 t/helper/test-reach.c | 104 +++++++
 t/helper/test-tool.c  |   1 +
 t/helper/test-tool.h  |   1 +
 t/t6600-test-reach.sh | 208 +++++++++++++
 upload-pack.c         |  58 +---
 17 files changed, 1095 insertions(+), 600 deletions(-)
 create mode 100644 commit-reach.c
 create mode 100644 commit-reach.h
 create mode 100644 t/helper/test-reach.c
 create mode 100755 t/t6600-test-reach.sh


base-commit: 596e28576ef3ca69432dbe5953b7bdcd18a32876
Published-As: https://github.com/gitgitgadget/git/releases/tags/pr-10%2Fderrickstolee%2Freach%2Frefactor-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-10/derrickstolee/reach/refactor-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/10
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (15 preceding siblings ...)
  2018-07-13 19:25 ` [PATCH 16/16] commit-reach: use can_all_from_reach Derrick Stolee via GitGitGadget
@ 2018-07-16 13:54 ` Ramsay Jones
  2018-07-16 16:18   ` Jeff King
  2018-07-16 17:26   ` Stefan Beller
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
  17 siblings, 2 replies; 118+ messages in thread
From: Ramsay Jones @ 2018-07-16 13:54 UTC (permalink / raw)
  To: Derrick Stolee via GitGitGadget, git; +Cc: Junio C Hamano



On 16/07/18 14:00, Derrick Stolee via GitGitGadget wrote:
> There are many places in Git that use a commit walk to determine
> reachability between commits and/or refs. A lot of this logic is
> duplicated.
[snip] ...

This is not your problem, but I find these GitGitGadget
submissions somewhat annoying. This series has been spewed
all over my in-box in, what I assume, is commit date order.

So, patches #4,5 dated 19/06, then #1,2,3 dated 25/06,
then #15 dated 28/06, then #6,7 dated 12/07, then #8-16
dated 13/07, then 00/16 dated today.

No I don't use a threaded display (I hate it), be even with
that turned on, the patches still appear in the above order
under the cover letter (but at least all together).

Annoyed.

ATB,
Ramsay Jones


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 13:54 ` [PATCH 00/16] Consolidate reachability logic Ramsay Jones
@ 2018-07-16 16:18   ` Jeff King
  2018-07-16 18:40     ` Eric Sunshine
  2018-07-18 12:23     ` Johannes Schindelin
  2018-07-16 17:26   ` Stefan Beller
  1 sibling, 2 replies; 118+ messages in thread
From: Jeff King @ 2018-07-16 16:18 UTC (permalink / raw)
  To: Ramsay Jones
  Cc: Johannes Schindelin, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano

On Mon, Jul 16, 2018 at 02:54:38PM +0100, Ramsay Jones wrote:

> On 16/07/18 14:00, Derrick Stolee via GitGitGadget wrote:
> > There are many places in Git that use a commit walk to determine
> > reachability between commits and/or refs. A lot of this logic is
> > duplicated.
> [snip] ...
> 
> This is not your problem, but I find these GitGitGadget
> submissions somewhat annoying. This series has been spewed
> all over my in-box in, what I assume, is commit date order.
> 
> So, patches #4,5 dated 19/06, then #1,2,3 dated 25/06,
> then #15 dated 28/06, then #6,7 dated 12/07, then #8-16
> dated 13/07, then 00/16 dated today.
> 
> No I don't use a threaded display (I hate it), be even with
> that turned on, the patches still appear in the above order
> under the cover letter (but at least all together).

Yeah, they're out of order in mutt's threaded display. And the
back-dating means there's a much higher chance of them getting blocked
as spam (e.g., some of the dates are from weeks ago).

git-send-email uses the current time minus an offset, and then
monotonically increases for each patch:

  $time = time - scalar $#files;
  ...
  my $date = format_2822_time($time++);

which seems to work pretty well in practice. It does mean the original
dates are lost. The committer date is not interesting at all (there will
be a new committer via "git am" anyway). The original author date is
potentially of interest, but could be included as an in-body header.
AFAIK send-email doesn't have such an option, though, and people are
fine with date-of-sending becoming the new author date.

+cc Johannes as the GitGitGadget author

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 13:54 ` [PATCH 00/16] Consolidate reachability logic Ramsay Jones
  2018-07-16 16:18   ` Jeff King
@ 2018-07-16 17:26   ` Stefan Beller
  2018-07-16 18:44     ` Eric Sunshine
  1 sibling, 1 reply; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 17:26 UTC (permalink / raw)
  To: Ramsay Jones, Johannes Schindelin; +Cc: gitgitgadget, git, Junio C Hamano

> This is not your problem, but I find these GitGitGadget
> submissions somewhat annoying.

Another pain point of the Gadget is that CC's in the cover letter
do not work as I would imagine. The line

CC: sbeller@google.com

did not put that email into the cc field.

How did you get Junios email into the TO, though?

Anyway I'll have a look at this series.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 16:18   ` Jeff King
@ 2018-07-16 18:40     ` Eric Sunshine
  2018-07-16 18:56       ` Jeff King
  2018-07-18 12:23     ` Johannes Schindelin
  1 sibling, 1 reply; 118+ messages in thread
From: Eric Sunshine @ 2018-07-16 18:40 UTC (permalink / raw)
  To: Jeff King
  Cc: Ramsay Jones, Johannes Schindelin, gitgitgadget, Git List,
	Junio C Hamano

On Mon, Jul 16, 2018 at 12:18 PM Jeff King <peff@peff.net> wrote:
> On Mon, Jul 16, 2018 at 02:54:38PM +0100, Ramsay Jones wrote:
> > This is not your problem, but I find these GitGitGadget
> > submissions somewhat annoying. This series has been spewed
> > all over my in-box in, what I assume, is commit date order.
> >
> > So, patches #4,5 dated 19/06, then #1,2,3 dated 25/06,
> > then #15 dated 28/06, then #6,7 dated 12/07, then #8-16
> > dated 13/07, then 00/16 dated today.
>
> Yeah, they're out of order in mutt's threaded display. And the
> back-dating means there's a much higher chance of them getting blocked
> as spam (e.g., some of the dates are from weeks ago).
>
> git-send-email uses the current time minus an offset, and then
> monotonically increases for each patch:

Junio pointed this out to gitgitgadget developers in [1], which led to
an issue being opened[2]. That issue was merged today.

[1]: https://public-inbox.org/git/xmqq7em7gg3j.fsf@gitster-ct.c.googlers.com/
[2]: https://github.com/gitgitgadget/gitgitgadget/pull/15

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 17:26   ` Stefan Beller
@ 2018-07-16 18:44     ` Eric Sunshine
  2018-07-16 18:47       ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Eric Sunshine @ 2018-07-16 18:44 UTC (permalink / raw)
  To: Stefan Beller
  Cc: Ramsay Jones, Johannes Schindelin, gitgitgadget, Git List,
	Junio C Hamano

On Mon, Jul 16, 2018 at 1:27 PM Stefan Beller <sbeller@google.com> wrote:
> Another pain point of the Gadget is that CC's in the cover letter
> do not work as I would imagine. The line
>
> CC: sbeller@google.com
>
> did not put that email into the cc field.

gitgitgadget recognizes case-sensitive "Cc:" only[1].

[1]: https://github.com/gitgitgadget/gitgitgadget/blob/c4805370f59532aa438283431b8ea7d4484c530f/lib/patch-series.ts#L188

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 18:44     ` Eric Sunshine
@ 2018-07-16 18:47       ` Derrick Stolee
  2018-07-18 12:28         ` Johannes Schindelin
  0 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-07-16 18:47 UTC (permalink / raw)
  To: Eric Sunshine, Stefan Beller
  Cc: Ramsay Jones, Johannes Schindelin, gitgitgadget, Git List,
	Junio C Hamano

On 7/16/2018 2:44 PM, Eric Sunshine wrote:
> On Mon, Jul 16, 2018 at 1:27 PM Stefan Beller <sbeller@google.com> wrote:
>> Another pain point of the Gadget is that CC's in the cover letter
>> do not work as I would imagine. The line
>>
>> CC: sbeller@google.com
>>
>> did not put that email into the cc field.
> gitgitgadget recognizes case-sensitive "Cc:" only[1].
>
> [1]: https://github.com/gitgitgadget/gitgitgadget/blob/c4805370f59532aa438283431b8ea7d4484c530f/lib/patch-series.ts#L188

Thanks for everyone's patience while we improve gitgitgadget (and - in 
this case - I learn how to use it).

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 18:40     ` Eric Sunshine
@ 2018-07-16 18:56       ` Jeff King
  2018-07-16 18:59         ` Eric Sunshine
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-07-16 18:56 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: Ramsay Jones, Johannes Schindelin, gitgitgadget, Git List,
	Junio C Hamano

On Mon, Jul 16, 2018 at 02:40:21PM -0400, Eric Sunshine wrote:

> On Mon, Jul 16, 2018 at 12:18 PM Jeff King <peff@peff.net> wrote:
> > On Mon, Jul 16, 2018 at 02:54:38PM +0100, Ramsay Jones wrote:
> > > This is not your problem, but I find these GitGitGadget
> > > submissions somewhat annoying. This series has been spewed
> > > all over my in-box in, what I assume, is commit date order.
> > >
> > > So, patches #4,5 dated 19/06, then #1,2,3 dated 25/06,
> > > then #15 dated 28/06, then #6,7 dated 12/07, then #8-16
> > > dated 13/07, then 00/16 dated today.
> >
> > Yeah, they're out of order in mutt's threaded display. And the
> > back-dating means there's a much higher chance of them getting blocked
> > as spam (e.g., some of the dates are from weeks ago).
> >
> > git-send-email uses the current time minus an offset, and then
> > monotonically increases for each patch:
> 
> Junio pointed this out to gitgitgadget developers in [1], which led to
> an issue being opened[2]. That issue was merged today.
> 
> [1]: https://public-inbox.org/git/xmqq7em7gg3j.fsf@gitster-ct.c.googlers.com/
> [2]: https://github.com/gitgitgadget/gitgitgadget/pull/15

I was going to say "oh good, fixed", but it looks like it just merged
adding that line to the TODO list. :)

Still, it looks like wheels are in motion, which is nice.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 01/16] commit-reach: move walk methods from commit.c
  2018-06-25 17:16 ` [PATCH 01/16] commit-reach: move walk methods from commit.c Derrick Stolee via GitGitGadget
@ 2018-07-16 18:57   ` Stefan Beller
  2018-07-16 21:31   ` Jonathan Tan
  1 sibling, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 18:57 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>

This looks good, apart from nits below.

Thanks,
Stefan

> diff --git a/commit-reach.c b/commit-reach.c
> new file mode 100644
> index 000000000..f2e2f7461
> --- /dev/null
> +++ b/commit-reach.c
> @@ -0,0 +1,359 @@
> +#include "cache.h"
> +#include "prio-queue.h"
> +#include "commit-reach.h"

and commit.h (see discussion below) ?

> diff --git a/commit-reach.h b/commit-reach.h
> new file mode 100644
> index 000000000..244f48c5f
> --- /dev/null
> +++ b/commit-reach.h
> @@ -0,0 +1,41 @@
> +#ifndef __COMMIT_REACH_H__
> +#define __COMMIT_REACH_H__
> +
> +#include "commit.h"

Do we really need to include the header file in another header file?
I'd think forward declarations would work fine here?
(The benefit of forward declaring the structs commits, commit_list
is purely for the poor saps of developers that we are, as then touching
commit.h would not trigger a compilation of files that only include this
header but not commit.h. That are not many in this particular case,
but I consider it good practice that we should follow)

> +
> +struct commit_list *get_merge_bases_many(struct commit *one,
> +                                        int n,
> +                                        struct commit **twos);
> +struct commit_list *get_merge_bases_many_dirty(struct commit *one,
> +                                              int n,
> +                                              struct commit **twos);
> +struct commit_list *get_merge_bases(struct commit *one, struct commit *two);
> +struct commit_list *get_octopus_merge_bases(struct commit_list *in);
> +
> +/* To be used only when object flags after this call no longer matter */
> +struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos);
> +
> +int is_descendant_of(struct commit *commit, struct commit_list *with_commit);
> +int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference);
> +int in_merge_bases(struct commit *commit, struct commit *reference);
> +
> +
> +/*
> + * Takes a list of commits and returns a new list where those
> + * have been removed that can be reached from other commits in
> + * the list. It is useful for, e.g., reducing the commits
> + * randomly thrown at the git-merge command and removing
> + * redundant commits that the user shouldn't have given to it.
> + *
> + * This function destroys the STALE bit of the commit objects'
> + * flags.
> + */
> +struct commit_list *reduce_heads(struct commit_list *heads);
> +
> +/*
> + * Like `reduce_heads()`, except it replaces the list. Use this
> + * instead of `foo = reduce_heads(foo);` to avoid memory leaks.
> + */
> +void reduce_heads_replace(struct commit_list **heads);

Thanks for the docs! Bonus points for also documenting the
other functions (is_descendant_of etc. For example is
is_descendant_of destroying some bit state?)

> +#endif
> diff --git a/commit.c b/commit.c
> index 39b80bd21..32d1234bd 100644
> --- a/commit.c
> +++ b/commit.c
> @@ -843,364 +843,6 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so
>                 clear_author_date_slab(&author_date);
>  }
>
> -/* merge-base stuff */

This is the only line that did not make it to the other file. :-)
I don't think it is needed in commit-reach.c

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 18:56       ` Jeff King
@ 2018-07-16 18:59         ` Eric Sunshine
  2018-07-18 12:32           ` Johannes Schindelin
  0 siblings, 1 reply; 118+ messages in thread
From: Eric Sunshine @ 2018-07-16 18:59 UTC (permalink / raw)
  To: Jeff King
  Cc: Ramsay Jones, Johannes Schindelin, gitgitgadget, Git List,
	Junio C Hamano

On Mon, Jul 16, 2018 at 2:56 PM Jeff King <peff@peff.net> wrote:
> On Mon, Jul 16, 2018 at 02:40:21PM -0400, Eric Sunshine wrote:
> > On Mon, Jul 16, 2018 at 12:18 PM Jeff King <peff@peff.net> wrote:
> > > git-send-email uses the current time minus an offset, and then
> > > monotonically increases for each patch:
> >
> > Junio pointed this out to gitgitgadget developers in [1], which led to
> > an issue being opened[2]. That issue was merged today.
> >
> > [1]: https://public-inbox.org/git/xmqq7em7gg3j.fsf@gitster-ct.c.googlers.com/
> > [2]: https://github.com/gitgitgadget/gitgitgadget/pull/15
>
> I was going to say "oh good, fixed", but it looks like it just merged
> adding that line to the TODO list. :)

Erm, right. I actually knew a couple days ago that that issue was just
a change to the TODO list but forgot that important tidbit when I
wrote the above "was merged today". Anyhow, at least it's on the
radar.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 02/16] commit-reach: move ref_newer from remote.c
  2018-06-25 17:35 ` [PATCH 02/16] commit-reach: move ref_newer from remote.c Derrick Stolee via GitGitGadget
@ 2018-07-16 19:10   ` Stefan Beller
  0 siblings, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 19:10 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>

Another verbatim move!
(I'll just re-iterate that the --color-moved option is very helpful in
these reviews)

Thanks,
Stefan

> +++ b/commit-reach.h
> @@ -38,4 +38,6 @@ struct commit_list *reduce_heads(struct commit_list *heads);
>   */
>  void reduce_heads_replace(struct commit_list **heads);
>
> +int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
> +

Bonus points for docs on ref_newer!

> +++ b/http-push.c
> @@ -14,6 +14,7 @@
>  #include "argv-array.h"
>  #include "packfile.h"
>  #include "object-store.h"
> +#include "commit-reach.h"
>
>

Double new line here?
I missed that in p1, it would be nice if you could fix that up.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 03/16] commit-reach: move commit_contains from ref-filter
  2018-06-25 18:01 ` [PATCH 03/16] commit-reach: move commit_contains from ref-filter Derrick Stolee via GitGitGadget
@ 2018-07-16 19:14   ` Stefan Beller
  0 siblings, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 19:14 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:

> +
> +int commit_contains(struct ref_filter *filter, struct commit *commit,
> +                   struct commit_list *list, struct contains_cache *cache)

[...]

> -
> -static int commit_contains(struct ref_filter *filter, struct commit *commit,
> -                          struct commit_list *list, struct contains_cache *cache)

All moved code, but this one, which was exposed to the public.
Might be worth calling out in the commit message?
While exposing it, it is a good idea to question its name and if
it is good enough for public use (I think it is -- despite not understanding
what the function does by its arguments; so bonus points for docs!)

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 06/16] upload-pack: generalize commit date cutoff
  2018-07-12 20:47 ` [PATCH 06/16] upload-pack: generalize commit date cutoff Derrick Stolee via GitGitGadget
@ 2018-07-16 19:38   ` Stefan Beller
  2018-07-18 16:04     ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 19:38 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The ok_to_give_up() method uses the commit date as a cutoff to avoid
> walking the entire reachble set of commits. Before moving the
> reachable() method to commit-reach.c, pull out the dependence on the
> global constant 'oldest_have' with a 'min_commit_date' parameter.


  'oldest_have' seems to be used in only one method after that
  (function got_oid); but as that function is called many times
  we either have to make it a function-global or pass around as a parameter,
  we'll defer that to later.

Code (of all previous patches and this one) look good!
Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 11/16] test-reach: test get_merge_bases_many
  2018-07-13 14:51 ` [PATCH 11/16] test-reach: test get_merge_bases_many Derrick Stolee via GitGitGadget
@ 2018-07-16 21:24   ` Stefan Beller
  2018-07-16 23:08   ` Jonathan Tan
  1 sibling, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 21:24 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

> +test_expect_success 'get_merge_bases_many' '
> +       cat >input <<-\EOF &&
> +               A:commit-5-7
> +               X:commit-4-8
> +               X:commit-6-6
> +               X:commit-8-3
> +       EOF
> +       {
> +               printf "get_merge_bases_many(A,X):\n" &&
> +               git rev-parse commit-5-6 &&
> +               git rev-parse commit-4-7

Please call rev-parse only once, giving both tips as argument, i.e.

               printf "get_merge_bases_many(A,X):\n" &&
               git rev-parse commit-5-6 \
                             commit-4-7

ought to produce the same output

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 12/16] test-reach: test reduce_heads
  2018-07-13 16:51 ` [PATCH 12/16] test-reach: test reduce_heads Derrick Stolee via GitGitGadget
@ 2018-07-16 21:30   ` Stefan Beller
  2018-07-16 21:59     ` Eric Sunshine
  0 siblings, 1 reply; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 21:30 UTC (permalink / raw)
  To: gitgitgadget, Eric Sunshine; +Cc: git, Junio C Hamano, Derrick Stolee

> +test_expect_success 'reduce_heads' '
> +       cat >input <<-\EOF &&
> +               X:commit-1-10
> +               X:commit-2-8
> +               X:commit-3-6
> +               X:commit-4-4
> +               X:commit-1-7
> +               X:commit-2-5
> +               X:commit-3-3
> +               X:commit-5-1
> +       EOF
> +       {
> +               printf "reduce_heads(X):\n" &&
> +               git rev-parse commit-5-1 &&
> +               git rev-parse commit-4-4 &&
> +               git rev-parse commit-3-6 &&
> +               git rev-parse commit-2-8 &&
> +               git rev-parse commit-1-10

Please use rev-parse only once.

I am not sure about the usage of { braces } in the test suite,
+cc Eric who sent a test suite linting series recently.
Do we need to em-'brace' the statements that describe the
expected behavior? (Or is it supposed to be easier to read
for the reviewers? I found these very readable so far... but
this question just came up)

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 01/16] commit-reach: move walk methods from commit.c
  2018-06-25 17:16 ` [PATCH 01/16] commit-reach: move walk methods from commit.c Derrick Stolee via GitGitGadget
  2018-07-16 18:57   ` Stefan Beller
@ 2018-07-16 21:31   ` Jonathan Tan
  1 sibling, 0 replies; 118+ messages in thread
From: Jonathan Tan @ 2018-07-16 21:31 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, gitster, dstolee, Jonathan Tan

> +/* Remember to update object flag allocation in object.h */
> +#define PARENT1		(1u<<16)
> +#define PARENT2		(1u<<17)
> +#define STALE		(1u<<18)
> +#define RESULT		(1u<<19)

Update object.h to point to commit-reach.c instead of commit.c also.

> diff --git a/commit-reach.h b/commit-reach.h
> new file mode 100644
> index 000000000..244f48c5f
> --- /dev/null
> +++ b/commit-reach.h
> @@ -0,0 +1,41 @@
> +#ifndef __COMMIT_REACH_H__
> +#define __COMMIT_REACH_H__
> +
> +#include "commit.h"
> +
> +struct commit_list *get_merge_bases_many(struct commit *one,
> +					 int n,
> +					 struct commit **twos);

<snip>

Should the declarations in commit.h be deleted also?

Thanks for copying it over verbatim - it makes it much easier to see
what's going on with --color-moved.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 13/16] test-reach: test can_all_from_reach_with_flags
  2018-07-13 17:22 ` [PATCH 13/16] test-reach: test can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
@ 2018-07-16 21:54   ` Stefan Beller
  2018-07-18 16:54     ` Derrick Stolee
  2018-07-17  0:10   ` Jonathan Tan
  1 sibling, 1 reply; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 21:54 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The can_all_from_reach_with_flags method is used by ok_to_give_up in
> upload-pack.c to see if we have done enough negotiation during a fetch.
> This method is intentionally created to preserve state between calls to
> assist with stateful negotiation, such as over SSH.
>
> To make this method testable, add a new can_all_from_reach method that
> does the initial setup and final tear-down. Call the method from
> 'test-tool reach'.
>
> Since this is a many-to-many reachability query, add a new type of input
> to the 'test-tool reach' input format. Lines "Y:<committish>" create a
> list of commits to be the reachability targets from the commits in the
> 'X' list. In the context of fetch negotiation, the 'X' commits are the
> 'want' commits and the 'Y' commits are the 'have' commits.

Makes sense. I shortly wondered if we want to s/Y/Z/ as I find X and Z
more distinguishable than X/Y for reading/skimming.

Thanks,
Stefan

> +++ b/commit-reach.c
> @@ -593,3 +593,50 @@ int can_all_from_reach_with_flag(struct object_array *from,
>         }
>         return 1;
>  }
> +
> +int can_all_from_reach(struct commit_list *from, struct commit_list *to,
> +                      int cutoff_by_min_date)

We'll put this method (that is only used by tests so far) here to
not clutter the test tool code too much, or do we see more benefits
from the code
here? If so, docs would be nice.

> +++ b/t/t6600-test-reach.sh

> +test_expect_success 'can_all_from_reach:hit' '
  [...]
> +               Y:commit-7-3
> +               Y:commit-8-1

> +test_expect_success 'can_all_from_reach:miss' '
[...]
> +               Y:commit-8-5

It would be nice if the difference in the list could be easier
to spot as a reader. (There is a lot of repetition).

Maybe we can teach "test-tool reach" to ignore input lines
starting with '#' such that we can annotate the last line in
the miss case?

Why do we omit 7-3 in the miss case? (might be nice
for symmetry to keep around)

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 12/16] test-reach: test reduce_heads
  2018-07-16 21:30   ` Stefan Beller
@ 2018-07-16 21:59     ` Eric Sunshine
  0 siblings, 0 replies; 118+ messages in thread
From: Eric Sunshine @ 2018-07-16 21:59 UTC (permalink / raw)
  To: Stefan Beller; +Cc: gitgitgadget, Git List, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 5:30 PM Stefan Beller <sbeller@google.com> wrote:
> > +test_expect_success 'reduce_heads' '
> > +       cat >input <<-\EOF &&
> > +               X:commit-1-10
> > +               X:commit-2-8
> > +               X:commit-3-6
> > +               X:commit-4-4
> > +               X:commit-1-7
> > +               X:commit-2-5
> > +               X:commit-3-3
> > +               X:commit-5-1
> > +       EOF
> > +       {
> > +               printf "reduce_heads(X):\n" &&
> > +               git rev-parse commit-5-1 &&
> > +               git rev-parse commit-4-4 &&
> > +               git rev-parse commit-3-6 &&
> > +               git rev-parse commit-2-8 &&
> > +               git rev-parse commit-1-10
> > +      } >expect &&
>
> Please use rev-parse only once.
>
> I am not sure about the usage of { braces } in the test suite,
> +cc Eric who sent a test suite linting series recently.
> Do we need to em-'brace' the statements that describe the
> expected behavior? (Or is it supposed to be easier to read
> for the reviewers? I found these very readable so far... but
> this question just came up)

Grouping the commands for redirection via a "{...}>expect" block is
less noisy than redirecting each command separately, thus more
reviewer-friendly. And, {...} blocks are used regularly in the test
suite, so no issue there.

I do agree that a single git-rev-parse with all 5 arguments makes more
sense (and would be appreciated by Windows folk). Also, the 'printf'
could be replaced by a simple 'echo' if we want to get nit-picky.

Finally, a style nit: We don't normally indent the content of a
here-doc like that. Instead, the content is normally aligned with the
closing EOF, not indented beyond it.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 14/16] commit-reach: replace ref_newer logic
  2018-07-13 18:37 ` [PATCH 14/16] commit-reach: replace ref_newer logic Derrick Stolee via GitGitGadget
@ 2018-07-16 22:16   ` Stefan Beller
  0 siblings, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 22:16 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The ref_newer method is used by 'git push' to check if a force-push is
> required. This method does not use any kind of cutoff when walking, so
> in the case of a force-push will walk all reachable commits.
>
> The is_descendant_of method already uses paint_down_to_common along with
> cutoffs. By translating the ref_newer arguments into the commit and
> commit_list required by is_descendant_of, we can have one fewer commit
> walk and also improve our performance!
>
> For a copy of the Linux repository, 'test-tool reach ref_newer' presents
> the following improvements with the specified input. In the case that
> ref_newer returns 1, there is no improvement. The improvement is in the
> second case where ref_newer returns 0.
>
> Input
> -----

I fetched the series as advertised in the cover letter; however Junio
applies the patches manually, for which there is a problem here in the
patch format. Three dashes indicate the end of a commit message and
below that you usually have some ephemeral information such as the
stats, followed by the diffs starting with "diff --git", at least that was the
case.

I just tested and it applies this patch cleanly keeping the information
below the three dashes intact. Cool!

> A:v4.9
> B:v3.19
>
> Before: 0.09 s
>  After: 0.09 s
>
> To test the negative case, add a new commit with parent v3.19,
> regenerate the commit-graph, and then run with B pointing at that
> commit.
>
> Before: 0.43 s
>  After: 0.09 s

Nice! The code looks good, too.
Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 07/16] commit-reach: move can_all_from_reach_with_flags
  2018-07-12 20:52 ` [PATCH 07/16] commit-reach: move can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
@ 2018-07-16 22:37   ` Jonathan Tan
  0 siblings, 0 replies; 118+ messages in thread
From: Jonathan Tan @ 2018-07-16 22:37 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, gitster, dstolee, Jonathan Tan

>  /* Remember to update object flag allocation in object.h */
> +#define REACHABLE       (1u<<15)
>  #define PARENT1		(1u<<16)
>  #define PARENT2		(1u<<17)
>  #define STALE		(1u<<18)

Update the object flag allocation in object.h.

> +int reachable(struct commit *from, int with_flag, int assign_flag,
> +	      time_t min_commit_date)

In this and previous patches: I think it's better to use "unsigned int"
as the data type for a flag, just like in clear_commit_marks().

Other than that, this and all previous patches look good.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-06-28 12:31 ` [PATCH 15/16] commit-reach: make can_all_from_reach... linear Derrick Stolee via GitGitGadget
@ 2018-07-16 22:37   ` Stefan Beller
  2018-07-17  1:16   ` Jonathan Tan
  2018-10-01 19:16   ` René Scharfe
  2 siblings, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 22:37 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> Note how the time increases between the two cases in the two versions.
> The new code increases relative to the number of commits that need to be
> walked, but not directly relative to the number of 'from' commits.

Cool!

>  int can_all_from_reach_with_flag(struct object_array *from,
>                                  int with_flag, int assign_flag,
> -                                time_t min_commit_date)
> +                                time_t min_commit_date,
> +                                uint32_t min_generation)
>  {

>         for (i = 0; i < from->nr; i++) {
[...]
> +               parse_commit(list[i]);

parse_commit_or_die or handle the return code?
(or a comment why we specifically are allowed to ignore
the return code here)

[...]
> +                       for (parent = stack->item->parents; parent; parent = parent->next) {
[...]
> +                                       parse_commit(parent->item);

same here.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 16/16] commit-reach: use can_all_from_reach
  2018-07-13 19:25 ` [PATCH 16/16] commit-reach: use can_all_from_reach Derrick Stolee via GitGitGadget
@ 2018-07-16 22:47   ` Stefan Beller
  0 siblings, 0 replies; 118+ messages in thread
From: Stefan Beller @ 2018-07-16 22:47 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> The is_descendant_of method previously used in_merge_bases() to check if
> the commit can reach any of the commits in the provided list. This had
> two performance problems:
>
> 1. The performance is quadratic in worst-case.
>
> 2. A single in_merge_bases() call requires walking beyond the target
>    commit in order to find the full set of boundary commits that may be
>    merge-bases.
>
> The can_all_from_reach method avoids this quadratic behavior and can
> limit the search beyond the target commits using generation numbers. It
> requires a small prototype adjustment to stop using commit-date as a
> cutoff, as that optimization is no longer appropriate here.
>
> Since in_merge_bases() uses paint_down_to_common(), is_descendant_of()
> naturally found cutoffs to avoid walking the entire commit graph. Since
> we want to always return the correct result, we cannot use the
> min_commit_date cutoff in can_all_from_reach. We then rely on generation
> numbers to provide the cutoff.
>
> Since not all repos will have a commit-graph file, nor will we always
> have generation numbers computed for a commit-graph file, create a new
> method, generation_numbers_enabled(), that checks for a commit-graph
> file and sees if the first commit in the file has a non-zero generation
> number. In the case that we do not have generation numbers, use the old
> logic for is_descendant_of().
>
> Performance was meausured on a copy of the Linux repository using the
> 'test-tool reach is_descendant_of' command using this input:
>
> A:v4.9
> X:v4.10
> X:v4.11
> X:v4.12
> X:v4.13
> X:v4.14
> X:v4.15
> X:v4.16
> X:v4.17
> X.v3.0
>
> Note that this input is tailored to demonstrate the quadratic nature of
> the previous method, as it will compute merge-bases for v4.9 versus all
> of the later versions before checking against v4.1.
>
> Before: 0.26 s
>  After: 0.21 s
>
> Since we previously used the is_descendant_of method in the ref_newer
> method, we also measured performance there using
> 'test-tool reach ref_newer' with this input:
>
> A:v4.9
> B:v3.19
>
> Before: 0.10 s
>  After: 0.08 s
>
> By adding a new commit with parent v3.19, we test the non-reachable case
> of ref_newer:
>
> Before: 0.09 s
>  After: 0.08 s
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---

Thanks for the commit message. The code itself looks good!

I think this series is nearly done, I have only commented on
style issues so far, which are easier to address than fundamental
design issues or naming things.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 08/16] test-reach: create new test tool for ref_newer
  2018-07-13 14:06 ` [PATCH 08/16] test-reach: create new test tool for ref_newer Derrick Stolee via GitGitGadget
@ 2018-07-16 23:00   ` Jonathan Tan
  2018-07-18 16:14     ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Jonathan Tan @ 2018-07-16 23:00 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, gitster, dstolee, Jonathan Tan

> To use the new test-tool, use 'test-tool reach <method>' and provide
> input to stdin that describes the inputs to the method. Currently, we
> only implement the ref_newer method, which requires two commits. Use
> lines "A:<committish>" and "B:<committish>" for the two inputs. We will
> expand this input later to accommodate methods that take lists of
> commits.

It would be nice if "A" and "B" were "ancestor" and "descendant" (or
something like that) instead, so that I don't have to check which
direction the reach is calculated in.

> +int cmd__reach(int ac, const char **av)
> +{
> +	struct object_id oid_A, oid_B;
> +	struct strbuf buf = STRBUF_INIT;
> +	struct repository *r = the_repository;
> +
> +	setup_git_directory();
> +
> +	if (ac < 2)
> +		exit(1);
> +
> +
> +	while (strbuf_getline(&buf, stdin) != EOF) {
> +		struct object_id oid;
> +		struct object *o;
> +		struct commit *c;
> +		if (buf.len < 3)
> +			continue;
> +
> +		if (get_oid_committish(buf.buf + 2, &oid))
> +			die("failed to resolve %s", buf.buf + 2);

You can also use skip_prefix() instead of using arithmetic to determine
the start of the OID.

> +# Construct a grid-like commit graph with points (x,y)
> +# with 1 <= x <= 10, 1 <= y <= 10, where (x,y) has
> +# parents (x-1, y) and (x, y-1), keeping in mind that
> +# we drop a parent if a coordinate is nonpositive.
> +#
> +#             (10,10)
> +#            /       \
> +#         (10,9)    (9,10)
> +#        /     \   /      \
> +#    (10,8)    (9,9)      (8,10)
> +#   /     \    /   \      /    \
> +#         ( continued...)
> +#   \     /    \   /      \    /
> +#    (3,1)     (2,2)      (1,3)
> +#        \     /    \     /
> +#         (2,1)      (2,1)
> +#              \    /
> +#              (1,1)

This is quite a good design, thanks.

> +# We use branch 'comit-x-y' to refer to (x,y).

s/comit/commit/

> +	git show-ref -s commit-7-7 | git commit-graph write --stdin-commits &&
> +	mv .git/objects/info/commit-graph commit-graph-half &&

My understanding is that this writes for 7-7 and all its ancestors,
but...

> +test_expect_success 'ref_newer:hit' '
> +	cat >input <<-\EOF &&
> +		A:commit-5-7
> +		B:commit-2-3
> +	EOF
> +	printf "ref_newer:1\n" >expect &&
> +	test_three_modes ref_newer
> +'
> +
> +test_done

...both 5-7 and 2-3 are ancestors of 7-7, right? Which means that you
don't test the "half" commit graph here. (It's probably sufficient to
just adjust the numbers.)

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 11/16] test-reach: test get_merge_bases_many
  2018-07-13 14:51 ` [PATCH 11/16] test-reach: test get_merge_bases_many Derrick Stolee via GitGitGadget
  2018-07-16 21:24   ` Stefan Beller
@ 2018-07-16 23:08   ` Jonathan Tan
  1 sibling, 0 replies; 118+ messages in thread
From: Jonathan Tan @ 2018-07-16 23:08 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, gitster, dstolee, Jonathan Tan

> @@ -71,6 +78,14 @@ int cmd__reach(int ac, const char **av)
>  		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
>  	else if (!strcmp(av[1], "is_descendant_of"))
>  		printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X));
> +	else if (!strcmp(av[1], "get_merge_bases_many")) {
> +		struct commit_list *list = get_merge_bases_many(A, X_nr, X_array);
> +		printf("%s(A,X):\n", av[1]);
> +		while (list) {
> +			printf("%s\n", oid_to_hex(&list->item->object.oid));
> +			list = list->next;
> +		}

I don't think get_merge_bases_many defines a sort order on its output?
It might be better to sort the resulting commit list here, so that the
output is more well-defined. (And omit the informational printf so that
it's slightly easier to generate the "expect" file.)

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 13/16] test-reach: test can_all_from_reach_with_flags
  2018-07-13 17:22 ` [PATCH 13/16] test-reach: test can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
  2018-07-16 21:54   ` Stefan Beller
@ 2018-07-17  0:10   ` Jonathan Tan
  1 sibling, 0 replies; 118+ messages in thread
From: Jonathan Tan @ 2018-07-17  0:10 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, gitster, dstolee, Jonathan Tan

The subject should be can_all_from_reach_with_flag (without the "s" at
the end). Likewise in the commit message.

> To make this method testable, add a new can_all_from_reach method that
> does the initial setup and final tear-down. Call the method from
> 'test-tool reach'.

This description leads me to believe that can_all_from_reach() is (1)
trivial, and (2) will not be used in production code. But (1) the
function itself is non-trivial and the function signature contains a
"cutoff_by_min_date" parameter not found in
can_all_from_reach_with_flag():

> +int can_all_from_reach(struct commit_list *from, struct commit_list *to,
> +		       int cutoff_by_min_date)

and (2) this function will be used in production code subsequently in
the "commit-reach: use can_all_from_reach" commit. It would be clearer,
maybe, if there were some rearrangement - maybe a commit introducing
this function, especially documenting what cutoff_by_min_date does, and
then a test (which just tests can_all_from_reach), and then the
"commit-reach: use can_all_from_reach" commit.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-06-28 12:31 ` [PATCH 15/16] commit-reach: make can_all_from_reach... linear Derrick Stolee via GitGitGadget
  2018-07-16 22:37   ` Stefan Beller
@ 2018-07-17  1:16   ` Jonathan Tan
  2018-10-01 19:16   ` René Scharfe
  2 siblings, 0 replies; 118+ messages in thread
From: Jonathan Tan @ 2018-07-17  1:16 UTC (permalink / raw)
  To: gitgitgadget; +Cc: git, gitster, dstolee, Jonathan Tan

> The first step includes using a depth-first-search (DFS) from each from
> commit, sorted by ascending generation number. We do not walk beyond the
> minimum generation number or the minimum commit date. This DFS is likely
> to be faster than the existing reachable() method because we expect
> previous ref values to be along the first-parent history.
> 
> If we find a target commit, then we mark everything in the DFS stack as
> a RESULT. This expands the set of targets for the other from commits. We
> also mark the visited commits using 'assign_flag' to prevent re-walking
> the same code.

Thanks for this - it was very helpful in understanding the code.

The function itself uses a DFS stack that contains only the trail
leading up to the currently processed node, and not the one that I'm
more familiar with, which also contains the siblings of processed nodes.
I'll annotate the function with my thought process in the hope that it
will aid future reviewers. (The diff as seen in the e-mail is confusing
so I'm reproducing the function itself, not any + or -.)

> int can_all_from_reach_with_flag(struct object_array *from,
> 				 int with_flag, int assign_flag,
> 				 time_t min_commit_date,
> 				 uint32_t min_generation)
> {
> 	struct commit **list = NULL;
> 	int i;
> 	int result = 1;
> 
> 	ALLOC_ARRAY(list, from->nr);
> 	for (i = 0; i < from->nr; i++) {
> 		list[i] = (struct commit *)from->objects[i].item;
> 
> 		parse_commit(list[i]);
> 
> 		if (list[i]->generation < min_generation)
> 			return 0;
> 	}
> 
> 	QSORT(list, from->nr, compare_commits_by_gen);
> 
> 	for (i = 0; i < from->nr; i++) {
> 		/* DFS from list[i] */
> 		struct commit_list *stack = NULL;
> 
> 		list[i]->object.flags |= assign_flag;
> 		commit_list_insert(list[i], &stack);
> 
> 		while (stack) {
> 			struct commit_list *parent;
> 
> 			if (stack->item->object.flags & with_flag) {
> 				pop_commit(&stack);
> 				continue;
> 			}

I wish that the code would refrain from pushing such an object instead
of popping it at the first opportunity, but I guess that doing so would
require the equivalent of a labeled break/continue. I have no qualms
with using "goto" in this case, but I know that some people don't like
it :-P

> 			for (parent = stack->item->parents; parent; parent = parent->next) {
> 				if (parent->item->object.flags & (with_flag | RESULT))
> 					stack->item->object.flags |= RESULT;

Straightforward, and also produces the bubbling up that we want. An
object is never popped unless it has the "with_flag" flag (see above) or
all its parents have been processed. The object can encounter the "if"
statement multiple times; the last one is when all its parents have been
processed (and thus have the RESULT flag set if necessary).

> 				if (!(parent->item->object.flags & assign_flag)) {
> 					parent->item->object.flags |= assign_flag;
> 
> 					parse_commit(parent->item);
> 
> 					if (parent->item->date < min_commit_date ||
> 					    parent->item->generation < min_generation)
> 						continue;
> 
> 					commit_list_insert(parent->item, &stack);
> 					break;
> 				}

If not yet processed, push it onto the stack and break. The child commit
is still left on the stack. The next time the child commit is processed
(in an iteration of the "while" loop), the "for" loop will iterate until
the next unprocessed parent.

In the DFS that I'm used to, all parents would be pushed here, but
perhaps the fact that the iteration is postorder confuses things.
Anyway, if someone comes up with a better algorithm, replacing it
shouldn't be too difficult - the algorithm is contained within this
function, and there are tests to check the correctness of the algorithm
update.

> 			}
> 
> 			if (!parent)
> 				pop_commit(&stack);

Only when we have no parents left are we completely done with the
current object.

> 		}
> 
> 		if (!(list[i]->object.flags & (with_flag | RESULT))) {
> 			result = 0;
> 			goto cleanup;
> 		}

And after the DFS, if the original object did not have an appropriate
flag set, we do not bother with the other "want" objects.

> 	}
> 
> cleanup:
> 	for (i = 0; i < from->nr; i++) {
> 		clear_commit_marks(list[i], RESULT);
> 		clear_commit_marks(list[i], assign_flag);
> 	}
> 	return result;
> }

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 16:18   ` Jeff King
  2018-07-16 18:40     ` Eric Sunshine
@ 2018-07-18 12:23     ` Johannes Schindelin
  2018-07-18 19:21       ` Jeff King
  1 sibling, 1 reply; 118+ messages in thread
From: Johannes Schindelin @ 2018-07-18 12:23 UTC (permalink / raw)
  To: Jeff King
  Cc: Ramsay Jones, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano

Hi Peff,

On Mon, 16 Jul 2018, Jeff King wrote:

> On Mon, Jul 16, 2018 at 02:54:38PM +0100, Ramsay Jones wrote:
> 
> > On 16/07/18 14:00, Derrick Stolee via GitGitGadget wrote:
> > > There are many places in Git that use a commit walk to determine
> > > reachability between commits and/or refs. A lot of this logic is
> > > duplicated.
> > [snip] ...
> > 
> > This is not your problem, but I find these GitGitGadget
> > submissions somewhat annoying. This series has been spewed
> > all over my in-box in, what I assume, is commit date order.
> > 
> > So, patches #4,5 dated 19/06, then #1,2,3 dated 25/06,
> > then #15 dated 28/06, then #6,7 dated 12/07, then #8-16
> > dated 13/07, then 00/16 dated today.
> > 
> > No I don't use a threaded display (I hate it), be even with
> > that turned on, the patches still appear in the above order
> > under the cover letter (but at least all together).
> 
> Yeah, they're out of order in mutt's threaded display. And the
> back-dating means there's a much higher chance of them getting blocked
> as spam (e.g., some of the dates are from weeks ago).
> 
> git-send-email uses the current time minus an offset, and then
> monotonically increases for each patch:
> 
>   $time = time - scalar $#files;
>   ...
>   my $date = format_2822_time($time++);
> 
> which seems to work pretty well in practice. It does mean the original
> dates are lost. The committer date is not interesting at all (there will
> be a new committer via "git am" anyway). The original author date is
> potentially of interest, but could be included as an in-body header.
> AFAIK send-email doesn't have such an option, though, and people are
> fine with date-of-sending becoming the new author date.
> 
> +cc Johannes as the GitGitGadget author

Thanks for dumping even more work on my shoulders.

I wanted to help with that insane process we have here, but in a more
collaborative manner.

This time I fixed it, but please do keep in mind that the decision to use
the email transport for something it *was not designed for* (it was
designed for humans talking to humans) is the culprit here.

Next time, I will ask you to jump in, instead of putting the onus on me.

I mean, seriously, what is this? "You can use *any* mail program to work
with the Git mailing list, *any* mailer. As long as it is mutt. And as
long as you spend hours and hours on tooling that oh BTW nobody else can
use."

Hopefully GitGitGadget will make this situation better. And hopefully not
on the expense of my sanity.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 18:47       ` Derrick Stolee
@ 2018-07-18 12:28         ` Johannes Schindelin
  2018-07-18 15:01           ` Duy Nguyen
  0 siblings, 1 reply; 118+ messages in thread
From: Johannes Schindelin @ 2018-07-18 12:28 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: Eric Sunshine, Stefan Beller, Ramsay Jones, gitgitgadget,
	Git List, Junio C Hamano

Hi,

On Mon, 16 Jul 2018, Derrick Stolee wrote:

> On 7/16/2018 2:44 PM, Eric Sunshine wrote:
> > On Mon, Jul 16, 2018 at 1:27 PM Stefan Beller <sbeller@google.com> wrote:
> > > Another pain point of the Gadget is that CC's in the cover letter
> > > do not work as I would imagine. The line
> > >
> > > CC: sbeller@google.com
> > >
> > > did not put that email into the cc field.
> > gitgitgadget recognizes case-sensitive "Cc:" only[1].
> >
> > [1]:
> > https://github.com/gitgitgadget/gitgitgadget/blob/c4805370f59532aa438283431b8ea7d4484c530f/lib/patch-series.ts#L188
> 
> Thanks for everyone's patience while we improve gitgitgadget (and - in this
> case - I learn how to use it).

And let's please stop pretending that this GitGitGadget project is
somebody else's problem.

It is our best shot at addressing the *constant* pain point that is the code
contribution process of Git.

In other words: if you see something that you don't like about
GitGitGadget, get your butts off the ground and contribute a fix. The code
contribution process of GitGitGadget is very easy: open a PR at
https://github.com/gitgitgadget/gitgitgadget

Ciao,
Johannes

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-16 18:59         ` Eric Sunshine
@ 2018-07-18 12:32           ` Johannes Schindelin
  0 siblings, 0 replies; 118+ messages in thread
From: Johannes Schindelin @ 2018-07-18 12:32 UTC (permalink / raw)
  To: Eric Sunshine
  Cc: Jeff King, Ramsay Jones, gitgitgadget, Git List, Junio C Hamano

Hi Eric & Peff,

On Mon, 16 Jul 2018, Eric Sunshine wrote:

> On Mon, Jul 16, 2018 at 2:56 PM Jeff King <peff@peff.net> wrote:
> > On Mon, Jul 16, 2018 at 02:40:21PM -0400, Eric Sunshine wrote:
> > > On Mon, Jul 16, 2018 at 12:18 PM Jeff King <peff@peff.net> wrote:
> > > > git-send-email uses the current time minus an offset, and then
> > > > monotonically increases for each patch:
> > >
> > > Junio pointed this out to gitgitgadget developers in [1], which led to
> > > an issue being opened[2]. That issue was merged today.
> > >
> > > [1]: https://public-inbox.org/git/xmqq7em7gg3j.fsf@gitster-ct.c.googlers.com/
> > > [2]: https://github.com/gitgitgadget/gitgitgadget/pull/15
> >
> > I was going to say "oh good, fixed", but it looks like it just merged
> > adding that line to the TODO list. :)
> 
> Erm, right. I actually knew a couple days ago that that issue was just
> a change to the TODO list but forgot that important tidbit when I
> wrote the above "was merged today". Anyhow, at least it's on the
> radar.

It is always nice to get such active contributions.

Seriously again, do feel free to jump in and contribute improvements to
GitGitGadget.

We have a very time-consuming (read: time wasting) code contribution
process, and it is an untenable situation, and GitGitGadget was designed
to be able to address this huge problem.

But I can't do it alone. And neither should you pretend that this is my
problem alone. This problem is as much your problem as it is mine.
(Whether you realize it or not.)

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 12:28         ` Johannes Schindelin
@ 2018-07-18 15:01           ` Duy Nguyen
  2018-07-18 17:01             ` Junio C Hamano
  0 siblings, 1 reply; 118+ messages in thread
From: Duy Nguyen @ 2018-07-18 15:01 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Derrick Stolee, Eric Sunshine, Stefan Beller, Ramsay Jones,
	gitgitgadget, Git Mailing List, Junio C Hamano

On Wed, Jul 18, 2018 at 2:30 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi,
>
> On Mon, 16 Jul 2018, Derrick Stolee wrote:
>
> > On 7/16/2018 2:44 PM, Eric Sunshine wrote:
> > > On Mon, Jul 16, 2018 at 1:27 PM Stefan Beller <sbeller@google.com> wrote:
> > > > Another pain point of the Gadget is that CC's in the cover letter
> > > > do not work as I would imagine. The line
> > > >
> > > > CC: sbeller@google.com
> > > >
> > > > did not put that email into the cc field.
> > > gitgitgadget recognizes case-sensitive "Cc:" only[1].
> > >
> > > [1]:
> > > https://github.com/gitgitgadget/gitgitgadget/blob/c4805370f59532aa438283431b8ea7d4484c530f/lib/patch-series.ts#L188
> >
> > Thanks for everyone's patience while we improve gitgitgadget (and - in this
> > case - I learn how to use it).
>
> And let's please stop pretending that this GitGitGadget project is
> somebody else's problem.
>
> It is our best shot at addressing the *constant* pain point that is the code
> contribution process of Git.
>
> In other words: if you see something that you don't like about
> GitGitGadget, get your butts off the ground and contribute a fix.

Thank you for the frank words. I will choose to not review any mails
coming from GitGitGadget.
-- 
Duy

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 06/16] upload-pack: generalize commit date cutoff
  2018-07-16 19:38   ` Stefan Beller
@ 2018-07-18 16:04     ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-18 16:04 UTC (permalink / raw)
  To: Stefan Beller, gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On 7/16/2018 3:38 PM, Stefan Beller wrote:
> On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> The ok_to_give_up() method uses the commit date as a cutoff to avoid
>> walking the entire reachble set of commits. Before moving the
>> reachable() method to commit-reach.c, pull out the dependence on the
>> global constant 'oldest_have' with a 'min_commit_date' parameter.
>
>    'oldest_have' seems to be used in only one method after that
>    (function got_oid); but as that function is called many times
>    we either have to make it a function-global or pass around as a parameter,
>    we'll defer that to later.

There is a lot of global state involved in this negotiation code, and it 
lives between negotiation rounds when the transfer is stateful. Tread 
carefully!

I did not attempt to reduce the global state at all.

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 08/16] test-reach: create new test tool for ref_newer
  2018-07-16 23:00   ` Jonathan Tan
@ 2018-07-18 16:14     ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-18 16:14 UTC (permalink / raw)
  To: Jonathan Tan, gitgitgadget; +Cc: git, gitster, dstolee

On 7/16/2018 7:00 PM, Jonathan Tan wrote:
>> To use the new test-tool, use 'test-tool reach <method>' and provide
>> input to stdin that describes the inputs to the method. Currently, we
>> only implement the ref_newer method, which requires two commits. Use
>> lines "A:<committish>" and "B:<committish>" for the two inputs. We will
>> expand this input later to accommodate methods that take lists of
>> commits.
> It would be nice if "A" and "B" were "ancestor" and "descendant" (or
> something like that) instead, so that I don't have to check which
> direction the reach is calculated in.

Different methods will use different combinations. I do notice that I 
forgot to list the method parameters as part of the test-tool output. It 
should print "ref_newer(A,B)" so you know which input goes in which place.

>
>> +int cmd__reach(int ac, const char **av)
>> +{
>> +	struct object_id oid_A, oid_B;
>> +	struct strbuf buf = STRBUF_INIT;
>> +	struct repository *r = the_repository;
>> +
>> +	setup_git_directory();
>> +
>> +	if (ac < 2)
>> +		exit(1);
>> +
>> +
>> +	while (strbuf_getline(&buf, stdin) != EOF) {
>> +		struct object_id oid;
>> +		struct object *o;
>> +		struct commit *c;
>> +		if (buf.len < 3)
>> +			continue;
>> +
>> +		if (get_oid_committish(buf.buf + 2, &oid))
>> +			die("failed to resolve %s", buf.buf + 2);
> You can also use skip_prefix() instead of using arithmetic to determine
> the start of the OID.
>
>> +# Construct a grid-like commit graph with points (x,y)
>> +# with 1 <= x <= 10, 1 <= y <= 10, where (x,y) has
>> +# parents (x-1, y) and (x, y-1), keeping in mind that
>> +# we drop a parent if a coordinate is nonpositive.
>> +#
>> +#             (10,10)
>> +#            /       \
>> +#         (10,9)    (9,10)
>> +#        /     \   /      \
>> +#    (10,8)    (9,9)      (8,10)
>> +#   /     \    /   \      /    \
>> +#         ( continued...)
>> +#   \     /    \   /      \    /
>> +#    (3,1)     (2,2)      (1,3)
>> +#        \     /    \     /
>> +#         (2,1)      (2,1)
>> +#              \    /
>> +#              (1,1)
> This is quite a good design, thanks.
>
>> +# We use branch 'comit-x-y' to refer to (x,y).
> s/comit/commit/
>
>> +	git show-ref -s commit-7-7 | git commit-graph write --stdin-commits &&
>> +	mv .git/objects/info/commit-graph commit-graph-half &&
> My understanding is that this writes for 7-7 and all its ancestors,
> but...
>
>> +test_expect_success 'ref_newer:hit' '
>> +	cat >input <<-\EOF &&
>> +		A:commit-5-7
>> +		B:commit-2-3
>> +	EOF
>> +	printf "ref_newer:1\n" >expect &&
>> +	test_three_modes ref_newer
>> +'
>> +
>> +test_done
> ...both 5-7 and 2-3 are ancestors of 7-7, right? Which means that you
> don't test the "half" commit graph here. (It's probably sufficient to
> just adjust the numbers.)

Good point! Thanks. I'll just write the commit-graph starting at 
commit-5-5 and that should satisfy the point of the "mixed-mode" tests.

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 13/16] test-reach: test can_all_from_reach_with_flags
  2018-07-16 21:54   ` Stefan Beller
@ 2018-07-18 16:54     ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-18 16:54 UTC (permalink / raw)
  To: Stefan Beller, gitgitgadget; +Cc: git, Junio C Hamano, Derrick Stolee

On 7/16/2018 5:54 PM, Stefan Beller wrote:
> On Mon, Jul 16, 2018 at 6:00 AM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
>> From: Derrick Stolee <dstolee@microsoft.com>
>>
>> The can_all_from_reach_with_flags method is used by ok_to_give_up in
>> upload-pack.c to see if we have done enough negotiation during a fetch.
>> This method is intentionally created to preserve state between calls to
>> assist with stateful negotiation, such as over SSH.
>>
>> To make this method testable, add a new can_all_from_reach method that
>> does the initial setup and final tear-down. Call the method from
>> 'test-tool reach'.
>>
>> Since this is a many-to-many reachability query, add a new type of input
>> to the 'test-tool reach' input format. Lines "Y:<committish>" create a
>> list of commits to be the reachability targets from the commits in the
>> 'X' list. In the context of fetch negotiation, the 'X' commits are the
>> 'want' commits and the 'Y' commits are the 'have' commits.
> Makes sense. I shortly wondered if we want to s/Y/Z/ as I find X and Z
> more distinguishable than X/Y for reading/skimming.
>
> Thanks,
> Stefan
>
>> +++ b/commit-reach.c
>> @@ -593,3 +593,50 @@ int can_all_from_reach_with_flag(struct object_array *from,
>>          }
>>          return 1;
>>   }
>> +
>> +int can_all_from_reach(struct commit_list *from, struct commit_list *to,
>> +                      int cutoff_by_min_date)
> We'll put this method (that is only used by tests so far) here to
> not clutter the test tool code too much, or do we see more benefits
> from the code
> here? If so, docs would be nice.

We will use it later as we reduce duplicate walk implementations, but I 
can hint at that in the message.


>
>> +++ b/t/t6600-test-reach.sh
>> +test_expect_success 'can_all_from_reach:hit' '
>    [...]
>> +               Y:commit-7-3
>> +               Y:commit-8-1
>> +test_expect_success 'can_all_from_reach:miss' '
> [...]
>> +               Y:commit-8-5
> It would be nice if the difference in the list could be easier
> to spot as a reader. (There is a lot of repetition).
>
> Maybe we can teach "test-tool reach" to ignore input lines
> starting with '#' such that we can annotate the last line in
> the miss case?
>
> Why do we omit 7-3 in the miss case? (might be nice
> for symmetry to keep around)

The X-commit that fails to reach a Y-commit in this second case is 
commit-8-3. That commit can reach both commit-7-3 and commit-8-1, so 
both need to be removed.

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 15:01           ` Duy Nguyen
@ 2018-07-18 17:01             ` Junio C Hamano
  2018-07-18 17:11               ` Derrick Stolee
  2018-07-19 16:32               ` Johannes Schindelin
  0 siblings, 2 replies; 118+ messages in thread
From: Junio C Hamano @ 2018-07-18 17:01 UTC (permalink / raw)
  To: Duy Nguyen
  Cc: Johannes Schindelin, Derrick Stolee, Eric Sunshine, Stefan Beller,
	Ramsay Jones, gitgitgadget, Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

>> In other words: if you see something that you don't like about
>> GitGitGadget, get your butts off the ground and contribute a fix.
>
> Thank you for the frank words. I will choose to not review any mails
> coming from GitGitGadget.

I wouldn't say I will choose not to, but certainly I noticed that
I'd backburner reading a series that are way out of order in my
mailbox, no matter who authored them or how they were sent out, as
they consume way more concentration-point out of my mind than they
are often worth X-<.  While there are easier-to-read and more nicely
organized patch series, I'd deal with them first, consciously or
not.

No, fixing a tool that throws such a harder-to-read patch series in
reader's mailbox is *not* something I'd spend my primary focus on,
especially when many contributors are perfectly capable of sending
reasonably formatted series without using such a tool under
development.

That won't stop those who want to improve the tool.  But I'd wish
those who want to make Git better spend their time on making Git,
over making GitGitGadget, better.


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 17:01             ` Junio C Hamano
@ 2018-07-18 17:11               ` Derrick Stolee
  2018-07-19 16:37                 ` Johannes Schindelin
  2018-07-19 16:32               ` Johannes Schindelin
  1 sibling, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-07-18 17:11 UTC (permalink / raw)
  To: Junio C Hamano, Duy Nguyen
  Cc: Johannes Schindelin, Eric Sunshine, Stefan Beller, Ramsay Jones,
	gitgitgadget, Git Mailing List

On 7/18/2018 1:01 PM, Junio C Hamano wrote:
> No, fixing a tool that throws such a harder-to-read patch series in
> reader's mailbox is *not* something I'd spend my primary focus on,
> especially when many contributors are perfectly capable of sending
> reasonably formatted series without using such a tool under
> development.
>
> That won't stop those who want to improve the tool.  But I'd wish
> those who want to make Git better spend their time on making Git,
> over making GitGitGadget, better.

I appreciate the feedback in how this series caused reviewer pain. 
Hopefully this date issue is now resolved. Any further feedback is welcome.

I'm choosing to use and contribute to GitGitGadget not because I'm 
incapable of sending series myself, but because I _have_ had difficulty. 
Using the email submissions creates a friction that I'm willing to 
overcome, but we are probably missing out on contributors who are not 
willing to push through that friction. Perhaps having another way for 
new contributors to feel welcome is an indirect way to make Git better.

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 12:23     ` Johannes Schindelin
@ 2018-07-18 19:21       ` Jeff King
  2018-07-19 16:34         ` Johannes Schindelin
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-07-18 19:21 UTC (permalink / raw)
  To: Johannes Schindelin
  Cc: Ramsay Jones, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano

On Wed, Jul 18, 2018 at 02:23:11PM +0200, Johannes Schindelin wrote:

> > Yeah, they're out of order in mutt's threaded display. And the
> > back-dating means there's a much higher chance of them getting blocked
> > as spam (e.g., some of the dates are from weeks ago).
> > 
> > git-send-email uses the current time minus an offset, and then
> > monotonically increases for each patch:
> > 
> >   $time = time - scalar $#files;
> >   ...
> >   my $date = format_2822_time($time++);
> > 
> > which seems to work pretty well in practice. It does mean the original
> > dates are lost. The committer date is not interesting at all (there will
> > be a new committer via "git am" anyway). The original author date is
> > potentially of interest, but could be included as an in-body header.
> > AFAIK send-email doesn't have such an option, though, and people are
> > fine with date-of-sending becoming the new author date.
> > 
> > +cc Johannes as the GitGitGadget author
> 
> Thanks for dumping even more work on my shoulders.

Wow. Here's my perspective on what I wrote.

Somebody pointed out an issue in the tool. I tried to add an additional
data point (how other clients react, and that I've seen spam-related
problems). And I tried to point to an existing solution in another tool,
in case that was helpful. I thought cc-ing you would be a favor, since
you obviously have an interest in the tool, and it is easy to miss
discussions buried deep in a thread.

So no, I didn't write the patch for you. But I tried to contribute
positively to the process. And I got yelled at for it. That makes me a
lot less inclined to try to help in the future.

> Next time, I will ask you to jump in, instead of putting the onus on me.
>
> I mean, seriously, what is this? "You can use *any* mail program to work
> with the Git mailing list, *any* mailer. As long as it is mutt. And as
> long as you spend hours and hours on tooling that oh BTW nobody else can
> use."

The irony here is that I actually _did_ look at the GitGitGadget
repository, and thought about making a patch to be helpful. But as it is
written in a language I'm not all that familiar with, using tools that I
don't normally use, I didn't want to spend hours and hours in order to
make what was probably going to be a one-line patch in software that I
don't use myself.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 17:01             ` Junio C Hamano
  2018-07-18 17:11               ` Derrick Stolee
@ 2018-07-19 16:32               ` Johannes Schindelin
  1 sibling, 0 replies; 118+ messages in thread
From: Johannes Schindelin @ 2018-07-19 16:32 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Duy Nguyen, Derrick Stolee, Eric Sunshine, Stefan Beller,
	Ramsay Jones, gitgitgadget, Git Mailing List

Hi Junio,

On Wed, 18 Jul 2018, Junio C Hamano wrote:

> That won't stop those who want to improve the tool.  But I'd wish
> those who want to make Git better spend their time on making Git,
> over making GitGitGadget, better.

And I'd wish that you would not make this task harder by refusing to fix
your process to update refs/notes/amlog.

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 19:21       ` Jeff King
@ 2018-07-19 16:34         ` Johannes Schindelin
  0 siblings, 0 replies; 118+ messages in thread
From: Johannes Schindelin @ 2018-07-19 16:34 UTC (permalink / raw)
  To: Jeff King
  Cc: Ramsay Jones, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano

Hi Peff,

On Wed, 18 Jul 2018, Jeff King wrote:

> On Wed, Jul 18, 2018 at 02:23:11PM +0200, Johannes Schindelin wrote:
> 
> > > Yeah, they're out of order in mutt's threaded display. And the
> > > back-dating means there's a much higher chance of them getting blocked
> > > as spam (e.g., some of the dates are from weeks ago).
> > > 
> > > git-send-email uses the current time minus an offset, and then
> > > monotonically increases for each patch:
> > > 
> > >   $time = time - scalar $#files;
> > >   ...
> > >   my $date = format_2822_time($time++);
> > > 
> > > which seems to work pretty well in practice. It does mean the original
> > > dates are lost. The committer date is not interesting at all (there will
> > > be a new committer via "git am" anyway). The original author date is
> > > potentially of interest, but could be included as an in-body header.
> > > AFAIK send-email doesn't have such an option, though, and people are
> > > fine with date-of-sending becoming the new author date.
> > > 
> > > +cc Johannes as the GitGitGadget author
> > 
> > Thanks for dumping even more work on my shoulders.
> 
> Wow. Here's my perspective on what I wrote.
> 
> Somebody pointed out an issue in the tool. I tried to add an additional
> data point (how other clients react, and that I've seen spam-related
> problems). And I tried to point to an existing solution in another tool,
> in case that was helpful. I thought cc-ing you would be a favor, since
> you obviously have an interest in the tool, and it is easy to miss
> discussions buried deep in a thread.
> 
> So no, I didn't write the patch for you. But I tried to contribute
> positively to the process. And I got yelled at for it. That makes me a
> lot less inclined to try to help in the future.
> 
> > Next time, I will ask you to jump in, instead of putting the onus on me.
> >
> > I mean, seriously, what is this? "You can use *any* mail program to work
> > with the Git mailing list, *any* mailer. As long as it is mutt. And as
> > long as you spend hours and hours on tooling that oh BTW nobody else can
> > use."
> 
> The irony here is that I actually _did_ look at the GitGitGadget
> repository, and thought about making a patch to be helpful. But as it is
> written in a language I'm not all that familiar with, using tools that I
> don't normally use, I didn't want to spend hours and hours in order to
> make what was probably going to be a one-line patch in software that I
> don't use myself.

I understand that. The web is not based on shell scripting, so there is no
good way to implement a bot on GitHub using Bash scripts.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 00/16] Consolidate reachability logic
  2018-07-18 17:11               ` Derrick Stolee
@ 2018-07-19 16:37                 ` Johannes Schindelin
  0 siblings, 0 replies; 118+ messages in thread
From: Johannes Schindelin @ 2018-07-19 16:37 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: Junio C Hamano, Duy Nguyen, Eric Sunshine, Stefan Beller,
	Ramsay Jones, gitgitgadget, Git Mailing List

Hi Stolee,

On Wed, 18 Jul 2018, Derrick Stolee wrote:

> On 7/18/2018 1:01 PM, Junio C Hamano wrote:
> > No, fixing a tool that throws such a harder-to-read patch series in
> > reader's mailbox is *not* something I'd spend my primary focus on,
> > especially when many contributors are perfectly capable of sending
> > reasonably formatted series without using such a tool under
> > development.
> >
> > That won't stop those who want to improve the tool.  But I'd wish
> > those who want to make Git better spend their time on making Git,
> > over making GitGitGadget, better.
> 
> I appreciate the feedback in how this series caused reviewer pain. Hopefully
> this date issue is now resolved. Any further feedback is welcome.
> 
> I'm choosing to use and contribute to GitGitGadget not because I'm incapable
> of sending series myself, but because I _have_ had difficulty. Using the email
> submissions creates a friction that I'm willing to overcome, but we are
> probably missing out on contributors who are not willing to push through that
> friction. Perhaps having another way for new contributors to feel welcome is
> an indirect way to make Git better.

While I am a seasoned Git contributor, it is *still* too painful to
contribute patches *even for me*.

So hopefully you and I will get this easier contribution process to the
point where other oldtimers do not want to take it.

At least we now have something that does not share the downsides with
SubmitGit, and is extensible enough that we can teach it new tricks.

With a little luck, Junio will fix amlog so that it is not utter garbage
for anybody but himself, and then GitGitGadget can give contributors
useful feedback about the state of their patch series, including automated
notifications when their patches have been mentioned in the What's Cooking
mail (which no irregular contributor reads, as far as I know).

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 118+ messages in thread

* [PATCH v2 00/18] Consolidate reachability logic
  2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
                   ` (16 preceding siblings ...)
  2018-07-16 13:54 ` [PATCH 00/16] Consolidate reachability logic Ramsay Jones
@ 2018-07-20 16:33 ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 01/18] commit-reach: move walk methods from commit.c Derrick Stolee
                     ` (21 more replies)
  17 siblings, 22 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

There are many places in Git that use a commit walk to determine
reachability between commits and/or refs. A lot of this logic is
duplicated.

I wanted to achieve the following:

Consolidate several different commit walks into one file
Reduce duplicate reachability logic
Increase testability (correctness and performance)
Improve performance of reachability queries
My approach is mostly in three parts:

I. Move code to a new commit-reach.c file.
II. Add a 'test-tool reach' command to test these methods directly.
III. Modify the logic by improving performance and calling methods with
similar logic but different prototypes.

The 'test-tool reach' command is helpful to make sure I don't break
anything as I change the logic, but also so I can test methods that are
normally only exposed by other more complicated commands. For instance,
ref_newer() is part of 'git push -f' and ok_to_give_up() is buried deep
within fetch negotiation. Both of these methods have some problematic
performance issues that are corrected by this series. As I discovered
them, it was clear that it would be better to consolidate walk logic
instead of discovering a new walk in another file hidden somewhere.

For the ok_to_give_up() method, I refactored the method so I could pull
the logic out of the depths of fetch negotiation. In the commit
"commit-reach: make can_all_from_reach... linear" I discuss how the
existing algorithm is quadratic and how we can make it linear. Also, we
can use heuristic knowledge about the shape of the commit graph and the
usual haves/wants to get some extra performance bonus. (The heuristic is
to do a DFS with first-parents first, and stop on first found result. We
expect haves/wants to include ref tips, which typically have their
previous values in their first-parent history.)

One major difference in this series versus the RFC is that I added a new
method 'generation_numbers_enabled()' to detect if we have a commit-graph
file with non-zero generation numbers. Using can_all_from_reach in
is_descendant_of is only faster if we have generation numbers as a cutoff.

V2 Update: The biggest material change in this version is that we drop the
method declarations from commit.h, which requires adding a lot of references
to commit-reach.h across the codebase. This change is in a commit on its own.
In addition, we have the following smaller changes:

* Use 'unsigned int' for the flag variables.

* Properly align the here-doc test input data.

* Use single rev-parse commands in test output, and pipe the OIDs through 'sort'

* Check output of parse_commit()

* Update flag documentation in object.h

* Add tests for commit_contains() including both algorithms.

* Reduce size of "mixed-mode" commit-graph to ensure we start commit walks
  'above' the graph and then walk into the commits with generation numbers.

Thanks,
-Stolee

This series is based on jt/commit-graph-per-object-store

Derrick Stolee (18):
  commit-reach: move walk methods from commit.c
  commit.h: remove method declarations
  commit-reach: move ref_newer from remote.c
  commit-reach: move commit_contains from ref-filter
  upload-pack: make reachable() more generic
  upload-pack: refactor ok_to_give_up()
  upload-pack: generalize commit date cutoff
  commit-reach: move can_all_from_reach_with_flags
  test-reach: create new test tool for ref_newer
  test-reach: test in_merge_bases
  test-reach: test is_descendant_of
  test-reach: test get_merge_bases_many
  test-reach: test reduce_heads
  test-reach: test can_all_from_reach_with_flags
  test-reach: test commit_contains
  commit-reach: replace ref_newer logic
  commit-reach: make can_all_from_reach... linear
  commit-reach: use can_all_from_reach

 Makefile                |   2 +
 bisect.c                |   1 +
 builtin/branch.c        |   1 +
 builtin/commit.c        |   1 +
 builtin/fetch.c         |   1 +
 builtin/fmt-merge-msg.c |   1 +
 builtin/log.c           |   1 +
 builtin/merge-base.c    |   1 +
 builtin/merge.c         |   1 +
 builtin/pull.c          |   1 +
 builtin/receive-pack.c  |   1 +
 builtin/remote.c        |   1 +
 builtin/rev-parse.c     |   1 +
 commit-graph.c          |  18 ++
 commit-graph.h          |   6 +
 commit-reach.c          | 662 ++++++++++++++++++++++++++++++++++++++++
 commit-reach.h          |  77 +++++
 commit.c                | 358 ----------------------
 commit.h                |  29 --
 fast-import.c           |   1 +
 http-push.c             |   2 +-
 merge-recursive.c       |   1 +
 notes-merge.c           |   1 +
 object.h                |   4 +-
 pack-bitmap-write.c     |   1 +
 ref-filter.c            | 146 +--------
 remote.c                |  50 +--
 remote.h                |   1 -
 revision.c              |   1 +
 sequencer.c             |   1 +
 sha1-name.c             |   1 +
 shallow.c               |   1 +
 submodule.c             |   1 +
 t/helper/test-reach.c   | 130 ++++++++
 t/helper/test-tool.c    |   1 +
 t/helper/test-tool.h    |   1 +
 t/t6600-test-reach.sh   | 242 +++++++++++++++
 upload-pack.c           |  58 +---
 38 files changed, 1177 insertions(+), 631 deletions(-)
 create mode 100644 commit-reach.c
 create mode 100644 commit-reach.h
 create mode 100644 t/helper/test-reach.c
 create mode 100755 t/t6600-test-reach.sh

-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply	[flat|nested] 118+ messages in thread

* [PATCH v2 01/18] commit-reach: move walk methods from commit.c
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 02/18] commit.h: remove method declarations Derrick Stolee
                     ` (20 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

There are several commit walks in the codebase. Group them together into
a new commit-reach.c file and corresponding header. After we group these
walks into one place, we can reduce duplicate logic by calling
equivalent methods.

The method declarations in commit.h are not touched by this commit and
will be moved in a following commit. Many consumers need to point to
commit-reach.h and that would bloat this commit.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Makefile       |   1 +
 commit-reach.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h |  42 ++++++
 commit.c       | 358 ------------------------------------------------
 object.h       |   2 +-
 5 files changed, 404 insertions(+), 359 deletions(-)
 create mode 100644 commit-reach.c
 create mode 100644 commit-reach.h

diff --git a/Makefile b/Makefile
index bb8bd67201..59781f4bc3 100644
--- a/Makefile
+++ b/Makefile
@@ -829,6 +829,7 @@ LIB_OBJS += column.o
 LIB_OBJS += combine-diff.o
 LIB_OBJS += commit.o
 LIB_OBJS += commit-graph.o
+LIB_OBJS += commit-reach.o
 LIB_OBJS += compat/obstack.o
 LIB_OBJS += compat/terminal.o
 LIB_OBJS += config.o
diff --git a/commit-reach.c b/commit-reach.c
new file mode 100644
index 0000000000..8ab6044414
--- /dev/null
+++ b/commit-reach.c
@@ -0,0 +1,360 @@
+#include "cache.h"
+#include "prio-queue.h"
+#include "commit.h"
+#include "commit-reach.h"
+
+/* Remember to update object flag allocation in object.h */
+#define PARENT1		(1u<<16)
+#define PARENT2		(1u<<17)
+#define STALE		(1u<<18)
+#define RESULT		(1u<<19)
+
+static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
+
+static int queue_has_nonstale(struct prio_queue *queue)
+{
+	int i;
+	for (i = 0; i < queue->nr; i++) {
+		struct commit *commit = queue->array[i].data;
+		if (!(commit->object.flags & STALE))
+			return 1;
+	}
+	return 0;
+}
+
+/* all input commits in one and twos[] must have been parsed! */
+static struct commit_list *paint_down_to_common(struct commit *one, int n,
+						struct commit **twos,
+						int min_generation)
+{
+	struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
+	struct commit_list *result = NULL;
+	int i;
+	uint32_t last_gen = GENERATION_NUMBER_INFINITY;
+
+	one->object.flags |= PARENT1;
+	if (!n) {
+		commit_list_append(one, &result);
+		return result;
+	}
+	prio_queue_put(&queue, one);
+
+	for (i = 0; i < n; i++) {
+		twos[i]->object.flags |= PARENT2;
+		prio_queue_put(&queue, twos[i]);
+	}
+
+	while (queue_has_nonstale(&queue)) {
+		struct commit *commit = prio_queue_get(&queue);
+		struct commit_list *parents;
+		int flags;
+
+		if (commit->generation > last_gen)
+			BUG("bad generation skip %8x > %8x at %s",
+			    commit->generation, last_gen,
+			    oid_to_hex(&commit->object.oid));
+		last_gen = commit->generation;
+
+		if (commit->generation < min_generation)
+			break;
+
+		flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
+		if (flags == (PARENT1 | PARENT2)) {
+			if (!(commit->object.flags & RESULT)) {
+				commit->object.flags |= RESULT;
+				commit_list_insert_by_date(commit, &result);
+			}
+			/* Mark parents of a found merge stale */
+			flags |= STALE;
+		}
+		parents = commit->parents;
+		while (parents) {
+			struct commit *p = parents->item;
+			parents = parents->next;
+			if ((p->object.flags & flags) == flags)
+				continue;
+			if (parse_commit(p))
+				return NULL;
+			p->object.flags |= flags;
+			prio_queue_put(&queue, p);
+		}
+	}
+
+	clear_prio_queue(&queue);
+	return result;
+}
+
+static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos)
+{
+	struct commit_list *list = NULL;
+	struct commit_list *result = NULL;
+	int i;
+
+	for (i = 0; i < n; i++) {
+		if (one == twos[i])
+			/*
+			 * We do not mark this even with RESULT so we do not
+			 * have to clean it up.
+			 */
+			return commit_list_insert(one, &result);
+	}
+
+	if (parse_commit(one))
+		return NULL;
+	for (i = 0; i < n; i++) {
+		if (parse_commit(twos[i]))
+			return NULL;
+	}
+
+	list = paint_down_to_common(one, n, twos, 0);
+
+	while (list) {
+		struct commit *commit = pop_commit(&list);
+		if (!(commit->object.flags & STALE))
+			commit_list_insert_by_date(commit, &result);
+	}
+	return result;
+}
+
+struct commit_list *get_octopus_merge_bases(struct commit_list *in)
+{
+	struct commit_list *i, *j, *k, *ret = NULL;
+
+	if (!in)
+		return ret;
+
+	commit_list_insert(in->item, &ret);
+
+	for (i = in->next; i; i = i->next) {
+		struct commit_list *new_commits = NULL, *end = NULL;
+
+		for (j = ret; j; j = j->next) {
+			struct commit_list *bases;
+			bases = get_merge_bases(i->item, j->item);
+			if (!new_commits)
+				new_commits = bases;
+			else
+				end->next = bases;
+			for (k = bases; k; k = k->next)
+				end = k;
+		}
+		ret = new_commits;
+	}
+	return ret;
+}
+
+static int remove_redundant(struct commit **array, int cnt)
+{
+	/*
+	 * Some commit in the array may be an ancestor of
+	 * another commit.  Move such commit to the end of
+	 * the array, and return the number of commits that
+	 * are independent from each other.
+	 */
+	struct commit **work;
+	unsigned char *redundant;
+	int *filled_index;
+	int i, j, filled;
+
+	work = xcalloc(cnt, sizeof(*work));
+	redundant = xcalloc(cnt, 1);
+	ALLOC_ARRAY(filled_index, cnt - 1);
+
+	for (i = 0; i < cnt; i++)
+		parse_commit(array[i]);
+	for (i = 0; i < cnt; i++) {
+		struct commit_list *common;
+		uint32_t min_generation = array[i]->generation;
+
+		if (redundant[i])
+			continue;
+		for (j = filled = 0; j < cnt; j++) {
+			if (i == j || redundant[j])
+				continue;
+			filled_index[filled] = j;
+			work[filled++] = array[j];
+
+			if (array[j]->generation < min_generation)
+				min_generation = array[j]->generation;
+		}
+		common = paint_down_to_common(array[i], filled, work,
+					      min_generation);
+		if (array[i]->object.flags & PARENT2)
+			redundant[i] = 1;
+		for (j = 0; j < filled; j++)
+			if (work[j]->object.flags & PARENT1)
+				redundant[filled_index[j]] = 1;
+		clear_commit_marks(array[i], all_flags);
+		clear_commit_marks_many(filled, work, all_flags);
+		free_commit_list(common);
+	}
+
+	/* Now collect the result */
+	COPY_ARRAY(work, array, cnt);
+	for (i = filled = 0; i < cnt; i++)
+		if (!redundant[i])
+			array[filled++] = work[i];
+	for (j = filled, i = 0; i < cnt; i++)
+		if (redundant[i])
+			array[j++] = work[i];
+	free(work);
+	free(redundant);
+	free(filled_index);
+	return filled;
+}
+
+static struct commit_list *get_merge_bases_many_0(struct commit *one,
+						  int n,
+						  struct commit **twos,
+						  int cleanup)
+{
+	struct commit_list *list;
+	struct commit **rslt;
+	struct commit_list *result;
+	int cnt, i;
+
+	result = merge_bases_many(one, n, twos);
+	for (i = 0; i < n; i++) {
+		if (one == twos[i])
+			return result;
+	}
+	if (!result || !result->next) {
+		if (cleanup) {
+			clear_commit_marks(one, all_flags);
+			clear_commit_marks_many(n, twos, all_flags);
+		}
+		return result;
+	}
+
+	/* There are more than one */
+	cnt = commit_list_count(result);
+	rslt = xcalloc(cnt, sizeof(*rslt));
+	for (list = result, i = 0; list; list = list->next)
+		rslt[i++] = list->item;
+	free_commit_list(result);
+
+	clear_commit_marks(one, all_flags);
+	clear_commit_marks_many(n, twos, all_flags);
+
+	cnt = remove_redundant(rslt, cnt);
+	result = NULL;
+	for (i = 0; i < cnt; i++)
+		commit_list_insert_by_date(rslt[i], &result);
+	free(rslt);
+	return result;
+}
+
+struct commit_list *get_merge_bases_many(struct commit *one,
+					 int n,
+					 struct commit **twos)
+{
+	return get_merge_bases_many_0(one, n, twos, 1);
+}
+
+struct commit_list *get_merge_bases_many_dirty(struct commit *one,
+					       int n,
+					       struct commit **twos)
+{
+	return get_merge_bases_many_0(one, n, twos, 0);
+}
+
+struct commit_list *get_merge_bases(struct commit *one, struct commit *two)
+{
+	return get_merge_bases_many_0(one, 1, &two, 1);
+}
+
+/*
+ * Is "commit" a descendant of one of the elements on the "with_commit" list?
+ */
+int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
+{
+	if (!with_commit)
+		return 1;
+	while (with_commit) {
+		struct commit *other;
+
+		other = with_commit->item;
+		with_commit = with_commit->next;
+		if (in_merge_bases(other, commit))
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Is "commit" an ancestor of one of the "references"?
+ */
+int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference)
+{
+	struct commit_list *bases;
+	int ret = 0, i;
+	uint32_t min_generation = GENERATION_NUMBER_INFINITY;
+
+	if (parse_commit(commit))
+		return ret;
+	for (i = 0; i < nr_reference; i++) {
+		if (parse_commit(reference[i]))
+			return ret;
+		if (reference[i]->generation < min_generation)
+			min_generation = reference[i]->generation;
+	}
+
+	if (commit->generation > min_generation)
+		return ret;
+
+	bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
+	if (commit->object.flags & PARENT2)
+		ret = 1;
+	clear_commit_marks(commit, all_flags);
+	clear_commit_marks_many(nr_reference, reference, all_flags);
+	free_commit_list(bases);
+	return ret;
+}
+
+/*
+ * Is "commit" an ancestor of (i.e. reachable from) the "reference"?
+ */
+int in_merge_bases(struct commit *commit, struct commit *reference)
+{
+	return in_merge_bases_many(commit, 1, &reference);
+}
+
+struct commit_list *reduce_heads(struct commit_list *heads)
+{
+	struct commit_list *p;
+	struct commit_list *result = NULL, **tail = &result;
+	struct commit **array;
+	int num_head, i;
+
+	if (!heads)
+		return NULL;
+
+	/* Uniquify */
+	for (p = heads; p; p = p->next)
+		p->item->object.flags &= ~STALE;
+	for (p = heads, num_head = 0; p; p = p->next) {
+		if (p->item->object.flags & STALE)
+			continue;
+		p->item->object.flags |= STALE;
+		num_head++;
+	}
+	array = xcalloc(num_head, sizeof(*array));
+	for (p = heads, i = 0; p; p = p->next) {
+		if (p->item->object.flags & STALE) {
+			array[i++] = p->item;
+			p->item->object.flags &= ~STALE;
+		}
+	}
+	num_head = remove_redundant(array, num_head);
+	for (i = 0; i < num_head; i++)
+		tail = &commit_list_insert(array[i], tail)->next;
+	free(array);
+	return result;
+}
+
+void reduce_heads_replace(struct commit_list **heads)
+{
+	struct commit_list *result = reduce_heads(*heads);
+	free_commit_list(*heads);
+	*heads = result;
+}
diff --git a/commit-reach.h b/commit-reach.h
new file mode 100644
index 0000000000..1ea2696e40
--- /dev/null
+++ b/commit-reach.h
@@ -0,0 +1,42 @@
+#ifndef __COMMIT_REACH_H__
+#define __COMMIT_REACH_H__
+
+struct commit;
+struct commit_list;
+
+struct commit_list *get_merge_bases_many(struct commit *one,
+					 int n,
+					 struct commit **twos);
+struct commit_list *get_merge_bases_many_dirty(struct commit *one,
+					       int n,
+					       struct commit **twos);
+struct commit_list *get_merge_bases(struct commit *one, struct commit *two);
+struct commit_list *get_octopus_merge_bases(struct commit_list *in);
+
+/* To be used only when object flags after this call no longer matter */
+struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos);
+
+int is_descendant_of(struct commit *commit, struct commit_list *with_commit);
+int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference);
+int in_merge_bases(struct commit *commit, struct commit *reference);
+
+
+/*
+ * Takes a list of commits and returns a new list where those
+ * have been removed that can be reached from other commits in
+ * the list. It is useful for, e.g., reducing the commits
+ * randomly thrown at the git-merge command and removing
+ * redundant commits that the user shouldn't have given to it.
+ *
+ * This function destroys the STALE bit of the commit objects'
+ * flags.
+ */
+struct commit_list *reduce_heads(struct commit_list *heads);
+
+/*
+ * Like `reduce_heads()`, except it replaces the list. Use this
+ * instead of `foo = reduce_heads(foo);` to avoid memory leaks.
+ */
+void reduce_heads_replace(struct commit_list **heads);
+
+#endif
diff --git a/commit.c b/commit.c
index 39b80bd21d..32d1234bd7 100644
--- a/commit.c
+++ b/commit.c
@@ -843,364 +843,6 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so
 		clear_author_date_slab(&author_date);
 }
 
-/* merge-base stuff */
-
-/* Remember to update object flag allocation in object.h */
-#define PARENT1		(1u<<16)
-#define PARENT2		(1u<<17)
-#define STALE		(1u<<18)
-#define RESULT		(1u<<19)
-
-static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT);
-
-static int queue_has_nonstale(struct prio_queue *queue)
-{
-	int i;
-	for (i = 0; i < queue->nr; i++) {
-		struct commit *commit = queue->array[i].data;
-		if (!(commit->object.flags & STALE))
-			return 1;
-	}
-	return 0;
-}
-
-/* all input commits in one and twos[] must have been parsed! */
-static struct commit_list *paint_down_to_common(struct commit *one, int n,
-						struct commit **twos,
-						int min_generation)
-{
-	struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
-	struct commit_list *result = NULL;
-	int i;
-	uint32_t last_gen = GENERATION_NUMBER_INFINITY;
-
-	one->object.flags |= PARENT1;
-	if (!n) {
-		commit_list_append(one, &result);
-		return result;
-	}
-	prio_queue_put(&queue, one);
-
-	for (i = 0; i < n; i++) {
-		twos[i]->object.flags |= PARENT2;
-		prio_queue_put(&queue, twos[i]);
-	}
-
-	while (queue_has_nonstale(&queue)) {
-		struct commit *commit = prio_queue_get(&queue);
-		struct commit_list *parents;
-		int flags;
-
-		if (commit->generation > last_gen)
-			BUG("bad generation skip %8x > %8x at %s",
-			    commit->generation, last_gen,
-			    oid_to_hex(&commit->object.oid));
-		last_gen = commit->generation;
-
-		if (commit->generation < min_generation)
-			break;
-
-		flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
-		if (flags == (PARENT1 | PARENT2)) {
-			if (!(commit->object.flags & RESULT)) {
-				commit->object.flags |= RESULT;
-				commit_list_insert_by_date(commit, &result);
-			}
-			/* Mark parents of a found merge stale */
-			flags |= STALE;
-		}
-		parents = commit->parents;
-		while (parents) {
-			struct commit *p = parents->item;
-			parents = parents->next;
-			if ((p->object.flags & flags) == flags)
-				continue;
-			if (parse_commit(p))
-				return NULL;
-			p->object.flags |= flags;
-			prio_queue_put(&queue, p);
-		}
-	}
-
-	clear_prio_queue(&queue);
-	return result;
-}
-
-static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos)
-{
-	struct commit_list *list = NULL;
-	struct commit_list *result = NULL;
-	int i;
-
-	for (i = 0; i < n; i++) {
-		if (one == twos[i])
-			/*
-			 * We do not mark this even with RESULT so we do not
-			 * have to clean it up.
-			 */
-			return commit_list_insert(one, &result);
-	}
-
-	if (parse_commit(one))
-		return NULL;
-	for (i = 0; i < n; i++) {
-		if (parse_commit(twos[i]))
-			return NULL;
-	}
-
-	list = paint_down_to_common(one, n, twos, 0);
-
-	while (list) {
-		struct commit *commit = pop_commit(&list);
-		if (!(commit->object.flags & STALE))
-			commit_list_insert_by_date(commit, &result);
-	}
-	return result;
-}
-
-struct commit_list *get_octopus_merge_bases(struct commit_list *in)
-{
-	struct commit_list *i, *j, *k, *ret = NULL;
-
-	if (!in)
-		return ret;
-
-	commit_list_insert(in->item, &ret);
-
-	for (i = in->next; i; i = i->next) {
-		struct commit_list *new_commits = NULL, *end = NULL;
-
-		for (j = ret; j; j = j->next) {
-			struct commit_list *bases;
-			bases = get_merge_bases(i->item, j->item);
-			if (!new_commits)
-				new_commits = bases;
-			else
-				end->next = bases;
-			for (k = bases; k; k = k->next)
-				end = k;
-		}
-		ret = new_commits;
-	}
-	return ret;
-}
-
-static int remove_redundant(struct commit **array, int cnt)
-{
-	/*
-	 * Some commit in the array may be an ancestor of
-	 * another commit.  Move such commit to the end of
-	 * the array, and return the number of commits that
-	 * are independent from each other.
-	 */
-	struct commit **work;
-	unsigned char *redundant;
-	int *filled_index;
-	int i, j, filled;
-
-	work = xcalloc(cnt, sizeof(*work));
-	redundant = xcalloc(cnt, 1);
-	ALLOC_ARRAY(filled_index, cnt - 1);
-
-	for (i = 0; i < cnt; i++)
-		parse_commit(array[i]);
-	for (i = 0; i < cnt; i++) {
-		struct commit_list *common;
-		uint32_t min_generation = array[i]->generation;
-
-		if (redundant[i])
-			continue;
-		for (j = filled = 0; j < cnt; j++) {
-			if (i == j || redundant[j])
-				continue;
-			filled_index[filled] = j;
-			work[filled++] = array[j];
-
-			if (array[j]->generation < min_generation)
-				min_generation = array[j]->generation;
-		}
-		common = paint_down_to_common(array[i], filled, work,
-					      min_generation);
-		if (array[i]->object.flags & PARENT2)
-			redundant[i] = 1;
-		for (j = 0; j < filled; j++)
-			if (work[j]->object.flags & PARENT1)
-				redundant[filled_index[j]] = 1;
-		clear_commit_marks(array[i], all_flags);
-		clear_commit_marks_many(filled, work, all_flags);
-		free_commit_list(common);
-	}
-
-	/* Now collect the result */
-	COPY_ARRAY(work, array, cnt);
-	for (i = filled = 0; i < cnt; i++)
-		if (!redundant[i])
-			array[filled++] = work[i];
-	for (j = filled, i = 0; i < cnt; i++)
-		if (redundant[i])
-			array[j++] = work[i];
-	free(work);
-	free(redundant);
-	free(filled_index);
-	return filled;
-}
-
-static struct commit_list *get_merge_bases_many_0(struct commit *one,
-						  int n,
-						  struct commit **twos,
-						  int cleanup)
-{
-	struct commit_list *list;
-	struct commit **rslt;
-	struct commit_list *result;
-	int cnt, i;
-
-	result = merge_bases_many(one, n, twos);
-	for (i = 0; i < n; i++) {
-		if (one == twos[i])
-			return result;
-	}
-	if (!result || !result->next) {
-		if (cleanup) {
-			clear_commit_marks(one, all_flags);
-			clear_commit_marks_many(n, twos, all_flags);
-		}
-		return result;
-	}
-
-	/* There are more than one */
-	cnt = commit_list_count(result);
-	rslt = xcalloc(cnt, sizeof(*rslt));
-	for (list = result, i = 0; list; list = list->next)
-		rslt[i++] = list->item;
-	free_commit_list(result);
-
-	clear_commit_marks(one, all_flags);
-	clear_commit_marks_many(n, twos, all_flags);
-
-	cnt = remove_redundant(rslt, cnt);
-	result = NULL;
-	for (i = 0; i < cnt; i++)
-		commit_list_insert_by_date(rslt[i], &result);
-	free(rslt);
-	return result;
-}
-
-struct commit_list *get_merge_bases_many(struct commit *one,
-					 int n,
-					 struct commit **twos)
-{
-	return get_merge_bases_many_0(one, n, twos, 1);
-}
-
-struct commit_list *get_merge_bases_many_dirty(struct commit *one,
-					       int n,
-					       struct commit **twos)
-{
-	return get_merge_bases_many_0(one, n, twos, 0);
-}
-
-struct commit_list *get_merge_bases(struct commit *one, struct commit *two)
-{
-	return get_merge_bases_many_0(one, 1, &two, 1);
-}
-
-/*
- * Is "commit" a descendant of one of the elements on the "with_commit" list?
- */
-int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
-{
-	if (!with_commit)
-		return 1;
-	while (with_commit) {
-		struct commit *other;
-
-		other = with_commit->item;
-		with_commit = with_commit->next;
-		if (in_merge_bases(other, commit))
-			return 1;
-	}
-	return 0;
-}
-
-/*
- * Is "commit" an ancestor of one of the "references"?
- */
-int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference)
-{
-	struct commit_list *bases;
-	int ret = 0, i;
-	uint32_t min_generation = GENERATION_NUMBER_INFINITY;
-
-	if (parse_commit(commit))
-		return ret;
-	for (i = 0; i < nr_reference; i++) {
-		if (parse_commit(reference[i]))
-			return ret;
-		if (reference[i]->generation < min_generation)
-			min_generation = reference[i]->generation;
-	}
-
-	if (commit->generation > min_generation)
-		return ret;
-
-	bases = paint_down_to_common(commit, nr_reference, reference, commit->generation);
-	if (commit->object.flags & PARENT2)
-		ret = 1;
-	clear_commit_marks(commit, all_flags);
-	clear_commit_marks_many(nr_reference, reference, all_flags);
-	free_commit_list(bases);
-	return ret;
-}
-
-/*
- * Is "commit" an ancestor of (i.e. reachable from) the "reference"?
- */
-int in_merge_bases(struct commit *commit, struct commit *reference)
-{
-	return in_merge_bases_many(commit, 1, &reference);
-}
-
-struct commit_list *reduce_heads(struct commit_list *heads)
-{
-	struct commit_list *p;
-	struct commit_list *result = NULL, **tail = &result;
-	struct commit **array;
-	int num_head, i;
-
-	if (!heads)
-		return NULL;
-
-	/* Uniquify */
-	for (p = heads; p; p = p->next)
-		p->item->object.flags &= ~STALE;
-	for (p = heads, num_head = 0; p; p = p->next) {
-		if (p->item->object.flags & STALE)
-			continue;
-		p->item->object.flags |= STALE;
-		num_head++;
-	}
-	array = xcalloc(num_head, sizeof(*array));
-	for (p = heads, i = 0; p; p = p->next) {
-		if (p->item->object.flags & STALE) {
-			array[i++] = p->item;
-			p->item->object.flags &= ~STALE;
-		}
-	}
-	num_head = remove_redundant(array, num_head);
-	for (i = 0; i < num_head; i++)
-		tail = &commit_list_insert(array[i], tail)->next;
-	free(array);
-	return result;
-}
-
-void reduce_heads_replace(struct commit_list **heads)
-{
-	struct commit_list *result = reduce_heads(*heads);
-	free_commit_list(*heads);
-	*heads = result;
-}
-
 static const char gpg_sig_header[] = "gpgsig";
 static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1;
 
diff --git a/object.h b/object.h
index fa5ca97567..18c2b073e3 100644
--- a/object.h
+++ b/object.h
@@ -65,7 +65,7 @@ struct object_array {
  * bisect.c:                                        16
  * bundle.c:                                        16
  * http-push.c:                                     16-----19
- * commit.c:                                        16-----19
+ * commit-reach.c:                                  16-----19
  * sha1-name.c:                                              20
  * list-objects-filter.c:                                      21
  * builtin/fsck.c:           0--3
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 02/18] commit.h: remove method declarations
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 01/18] commit-reach: move walk methods from commit.c Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 03/18] commit-reach: move ref_newer from remote.c Derrick Stolee
                     ` (19 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

These methods are now declared in commit-reach.h. Remove them from
commit.h and add new include statements in all files that require these
declarations.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 bisect.c                |  1 +
 builtin/branch.c        |  1 +
 builtin/commit.c        |  1 +
 builtin/fetch.c         |  1 +
 builtin/fmt-merge-msg.c |  1 +
 builtin/log.c           |  1 +
 builtin/merge-base.c    |  1 +
 builtin/merge.c         |  1 +
 builtin/pull.c          |  1 +
 builtin/receive-pack.c  |  1 +
 builtin/rev-parse.c     |  1 +
 commit.h                | 29 -----------------------------
 fast-import.c           |  1 +
 http-push.c             |  2 +-
 merge-recursive.c       |  1 +
 notes-merge.c           |  1 +
 pack-bitmap-write.c     |  1 +
 ref-filter.c            |  1 +
 remote.c                |  1 +
 revision.c              |  1 +
 sequencer.c             |  1 +
 sha1-name.c             |  1 +
 shallow.c               |  1 +
 submodule.c             |  1 +
 24 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/bisect.c b/bisect.c
index e1275ba79e..d023543c91 100644
--- a/bisect.c
+++ b/bisect.c
@@ -13,6 +13,7 @@
 #include "sha1-array.h"
 #include "argv-array.h"
 #include "commit-slab.h"
+#include "commit-reach.h"
 
 static struct oid_array good_revs;
 static struct oid_array skipped_revs;
diff --git a/builtin/branch.c b/builtin/branch.c
index a50632fb23..9a787447f4 100644
--- a/builtin/branch.c
+++ b/builtin/branch.c
@@ -23,6 +23,7 @@
 #include "ref-filter.h"
 #include "worktree.h"
 #include "help.h"
+#include "commit-reach.h"
 
 static const char * const builtin_branch_usage[] = {
 	N_("git branch [<options>] [-r | -a] [--merged | --no-merged]"),
diff --git a/builtin/commit.c b/builtin/commit.c
index 158e3f843a..b5c608458e 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -33,6 +33,7 @@
 #include "sequencer.h"
 #include "mailmap.h"
 #include "help.h"
+#include "commit-reach.h"
 
 static const char * const builtin_commit_usage[] = {
 	N_("git commit [<options>] [--] <pathspec>..."),
diff --git a/builtin/fetch.c b/builtin/fetch.c
index f5d960baec..7de234774b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -22,6 +22,7 @@
 #include "utf8.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "commit-reach.h"
 
 static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] [<repository> [<refspec>...]]"),
diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index ff165c0fcd..7277d557b2 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -12,6 +12,7 @@
 #include "fmt-merge-msg.h"
 #include "gpg-interface.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 static const char * const fmt_merge_msg_usage[] = {
 	N_("git fmt-merge-msg [-m <message>] [--log[=<n>] | --no-log] [--file <file>]"),
diff --git a/builtin/log.c b/builtin/log.c
index 55a6286d7f..333d97c692 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -31,6 +31,7 @@
 #include "progress.h"
 #include "commit-slab.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 #define MAIL_DEFAULT_WRAP 72
 
diff --git a/builtin/merge-base.c b/builtin/merge-base.c
index 08d91b1f0c..1c92099070 100644
--- a/builtin/merge-base.c
+++ b/builtin/merge-base.c
@@ -7,6 +7,7 @@
 #include "revision.h"
 #include "parse-options.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 static int show_merge_base(struct commit **rev, int rev_nr, int show_all)
 {
diff --git a/builtin/merge.c b/builtin/merge.c
index d1b547d973..4c601c40a2 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -36,6 +36,7 @@
 #include "packfile.h"
 #include "tag.h"
 #include "alias.h"
+#include "commit-reach.h"
 
 #define DEFAULT_TWOHEAD (1<<0)
 #define DEFAULT_OCTOPUS (1<<1)
diff --git a/builtin/pull.c b/builtin/pull.c
index 4e78935392..15ad010968 100644
--- a/builtin/pull.c
+++ b/builtin/pull.c
@@ -22,6 +22,7 @@
 #include "tempfile.h"
 #include "lockfile.h"
 #include "wt-status.h"
+#include "commit-reach.h"
 
 enum rebase_type {
 	REBASE_INVALID = -1,
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 400d31c18c..d8467f9734 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -27,6 +27,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "protocol.h"
+#include "commit-reach.h"
 
 static const char * const receive_pack_usage[] = {
 	N_("git receive-pack <git-dir>"),
diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index 0f09bbbf65..455f62246d 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -14,6 +14,7 @@
 #include "revision.h"
 #include "split-index.h"
 #include "submodule.h"
+#include "commit-reach.h"
 
 #define DO_REVS		1
 #define DO_NOREV	2
diff --git a/commit.h b/commit.h
index da0db36eba..e2c99d9b04 100644
--- a/commit.h
+++ b/commit.h
@@ -204,13 +204,6 @@ struct commit_graft *read_graft_line(struct strbuf *line);
 int register_commit_graft(struct repository *r, struct commit_graft *, int);
 struct commit_graft *lookup_commit_graft(struct repository *r, const struct object_id *oid);
 
-extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2);
-extern struct commit_list *get_merge_bases_many(struct commit *one, int n, struct commit **twos);
-extern struct commit_list *get_octopus_merge_bases(struct commit_list *in);
-
-/* To be used only when object flags after this call no longer matter */
-extern struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos);
-
 /* largest positive number a signed 32-bit integer can contain */
 #define INFINITE_DEPTH 0x7fffffff
 
@@ -258,32 +251,10 @@ extern int delayed_reachability_test(struct shallow_info *si, int c);
 extern void prune_shallow(int show_only);
 extern struct trace_key trace_shallow;
 
-int is_descendant_of(struct commit *, struct commit_list *);
-int in_merge_bases(struct commit *, struct commit *);
-int in_merge_bases_many(struct commit *, int, struct commit **);
-
 extern int interactive_add(int argc, const char **argv, const char *prefix, int patch);
 extern int run_add_interactive(const char *revision, const char *patch_mode,
 			       const struct pathspec *pathspec);
 
-/*
- * Takes a list of commits and returns a new list where those
- * have been removed that can be reached from other commits in
- * the list. It is useful for, e.g., reducing the commits
- * randomly thrown at the git-merge command and removing
- * redundant commits that the user shouldn't have given to it.
- *
- * This function destroys the STALE bit of the commit objects'
- * flags.
- */
-extern struct commit_list *reduce_heads(struct commit_list *heads);
-
-/*
- * Like `reduce_heads()`, except it replaces the list. Use this
- * instead of `foo = reduce_heads(foo);` to avoid memory leaks.
- */
-extern void reduce_heads_replace(struct commit_list **heads);
-
 struct commit_extra_header {
 	struct commit_extra_header *next;
 	char *key;
diff --git a/fast-import.c b/fast-import.c
index 3ea5781029..4a93df3839 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -171,6 +171,7 @@ Format of STDIN stream:
 #include "packfile.h"
 #include "object-store.h"
 #include "mem-pool.h"
+#include "commit-reach.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
diff --git a/http-push.c b/http-push.c
index 5eaf551b51..91fdc7e1d5 100644
--- a/http-push.c
+++ b/http-push.c
@@ -14,7 +14,7 @@
 #include "argv-array.h"
 #include "packfile.h"
 #include "object-store.h"
-
+#include "commit-reach.h"
 
 #ifdef EXPAT_NEEDS_XMLPARSE_H
 #include <xmlparse.h>
diff --git a/merge-recursive.c b/merge-recursive.c
index 1dd6ec384d..8155dee9a9 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -27,6 +27,7 @@
 #include "dir.h"
 #include "submodule.h"
 #include "revision.h"
+#include "commit-reach.h"
 
 struct path_hashmap_entry {
 	struct hashmap_entry e;
diff --git a/notes-merge.c b/notes-merge.c
index 76ab19e702..12dfdf6c17 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -12,6 +12,7 @@
 #include "notes-merge.h"
 #include "strbuf.h"
 #include "notes-utils.h"
+#include "commit-reach.h"
 
 struct notes_merge_pair {
 	struct object_id obj, base, local, remote;
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 953c5dd84d..55bcab907c 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -11,6 +11,7 @@
 #include "pack-bitmap.h"
 #include "sha1-lookup.h"
 #include "pack-objects.h"
+#include "commit-reach.h"
 
 struct bitmapped_commit {
 	struct commit *commit;
diff --git a/ref-filter.c b/ref-filter.c
index 9b2da88392..fca3ad040a 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -19,6 +19,7 @@
 #include "wt-status.h"
 #include "commit-slab.h"
 #include "commit-graph.h"
+#include "commit-reach.h"
 
 static struct ref_msg {
 	const char *gone;
diff --git a/remote.c b/remote.c
index 26b1fbd9a8..8e99b9888a 100644
--- a/remote.c
+++ b/remote.c
@@ -12,6 +12,7 @@
 #include "string-list.h"
 #include "mergesort.h"
 #include "argv-array.h"
+#include "commit-reach.h"
 
 enum map_direction { FROM_SRC, FROM_DST };
 
diff --git a/revision.c b/revision.c
index 4dbe406bed..3205a3947a 100644
--- a/revision.c
+++ b/revision.c
@@ -24,6 +24,7 @@
 #include "packfile.h"
 #include "worktree.h"
 #include "argv-array.h"
+#include "commit-reach.h"
 
 volatile show_early_output_fn_t show_early_output;
 
diff --git a/sequencer.c b/sequencer.c
index d1d07bed5b..97bdfd48b4 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -30,6 +30,7 @@
 #include "oidset.h"
 #include "commit-slab.h"
 #include "alias.h"
+#include "commit-reach.h"
 
 #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION"
 
diff --git a/sha1-name.c b/sha1-name.c
index 009faab4ae..7215b30b88 100644
--- a/sha1-name.c
+++ b/sha1-name.c
@@ -12,6 +12,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 static int get_oid_oneline(const char *, struct object_id *, struct commit_list *);
 
diff --git a/shallow.c b/shallow.c
index dbe8a2a290..99fd2d1ba0 100644
--- a/shallow.c
+++ b/shallow.c
@@ -16,6 +16,7 @@
 #include "list-objects.h"
 #include "commit-slab.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 void set_alternate_shallow_file(struct repository *r, const char *path, int override)
 {
diff --git a/submodule.c b/submodule.c
index 6688dd5d45..6650ed7aa0 100644
--- a/submodule.c
+++ b/submodule.c
@@ -22,6 +22,7 @@
 #include "worktree.h"
 #include "parse-options.h"
 #include "object-store.h"
+#include "commit-reach.h"
 
 static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
 static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP;
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 03/18] commit-reach: move ref_newer from remote.c
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 01/18] commit-reach: move walk methods from commit.c Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 02/18] commit.h: remove method declarations Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter Derrick Stolee
                     ` (18 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

There are several commit walks in the codebase. Group them together into
a new commit-reach.c file and corresponding header. After we group these
walks into one place, we can reduce duplicate logic by calling
equivalent methods.

The ref_newer() method is used by 'git push -f' to check if a force-push
is necessary. By making the method public, we make it possible to test
the method directly without setting up an envieronment where a 'git
push' call makes sense.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 builtin/remote.c |  1 +
 commit-reach.c   | 55 +++++++++++++++++++++++++++++++++++++++++++++++-
 commit-reach.h   |  2 ++
 remote.c         | 49 ------------------------------------------
 remote.h         |  1 -
 5 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/builtin/remote.c b/builtin/remote.c
index c74ee88690..79b0326446 100644
--- a/builtin/remote.c
+++ b/builtin/remote.c
@@ -10,6 +10,7 @@
 #include "refspec.h"
 #include "object-store.h"
 #include "argv-array.h"
+#include "commit-reach.h"
 
 static const char * const builtin_remote_usage[] = {
 	N_("git remote [-v | --verbose]"),
diff --git a/commit-reach.c b/commit-reach.c
index 8ab6044414..a6bc4781a6 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -1,6 +1,10 @@
 #include "cache.h"
-#include "prio-queue.h"
 #include "commit.h"
+#include "decorate.h"
+#include "prio-queue.h"
+#include "tree.h"
+#include "revision.h"
+#include "tag.h"
 #include "commit-reach.h"
 
 /* Remember to update object flag allocation in object.h */
@@ -358,3 +362,52 @@ void reduce_heads_replace(struct commit_list **heads)
 	free_commit_list(*heads);
 	*heads = result;
 }
+
+static void unmark_and_free(struct commit_list *list, unsigned int mark)
+{
+	while (list) {
+		struct commit *commit = pop_commit(&list);
+		commit->object.flags &= ~mark;
+	}
+}
+
+int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
+{
+	struct object *o;
+	struct commit *old_commit, *new_commit;
+	struct commit_list *list, *used;
+	int found = 0;
+
+	/*
+	 * Both new_commit and old_commit must be commit-ish and new_commit is descendant of
+	 * old_commit.  Otherwise we require --force.
+	 */
+	o = deref_tag(the_repository, parse_object(the_repository, old_oid),
+		      NULL, 0);
+	if (!o || o->type != OBJ_COMMIT)
+		return 0;
+	old_commit = (struct commit *) o;
+
+	o = deref_tag(the_repository, parse_object(the_repository, new_oid),
+		      NULL, 0);
+	if (!o || o->type != OBJ_COMMIT)
+		return 0;
+	new_commit = (struct commit *) o;
+
+	if (parse_commit(new_commit) < 0)
+		return 0;
+
+	used = list = NULL;
+	commit_list_insert(new_commit, &list);
+	while (list) {
+		new_commit = pop_most_recent_commit(&list, TMP_MARK);
+		commit_list_insert(new_commit, &used);
+		if (new_commit == old_commit) {
+			found = 1;
+			break;
+		}
+	}
+	unmark_and_free(list, TMP_MARK);
+	unmark_and_free(used, TMP_MARK);
+	return found;
+}
diff --git a/commit-reach.h b/commit-reach.h
index 1ea2696e40..f1cf9bfcd8 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -39,4 +39,6 @@ struct commit_list *reduce_heads(struct commit_list *heads);
  */
 void reduce_heads_replace(struct commit_list **heads);
 
+int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
+
 #endif
diff --git a/remote.c b/remote.c
index 8e99b9888a..f0c23bae48 100644
--- a/remote.c
+++ b/remote.c
@@ -1784,55 +1784,6 @@ int resolve_remote_symref(struct ref *ref, struct ref *list)
 	return 1;
 }
 
-static void unmark_and_free(struct commit_list *list, unsigned int mark)
-{
-	while (list) {
-		struct commit *commit = pop_commit(&list);
-		commit->object.flags &= ~mark;
-	}
-}
-
-int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
-{
-	struct object *o;
-	struct commit *old_commit, *new_commit;
-	struct commit_list *list, *used;
-	int found = 0;
-
-	/*
-	 * Both new_commit and old_commit must be commit-ish and new_commit is descendant of
-	 * old_commit.  Otherwise we require --force.
-	 */
-	o = deref_tag(the_repository, parse_object(the_repository, old_oid),
-		      NULL, 0);
-	if (!o || o->type != OBJ_COMMIT)
-		return 0;
-	old_commit = (struct commit *) o;
-
-	o = deref_tag(the_repository, parse_object(the_repository, new_oid),
-		      NULL, 0);
-	if (!o || o->type != OBJ_COMMIT)
-		return 0;
-	new_commit = (struct commit *) o;
-
-	if (parse_commit(new_commit) < 0)
-		return 0;
-
-	used = list = NULL;
-	commit_list_insert(new_commit, &list);
-	while (list) {
-		new_commit = pop_most_recent_commit(&list, TMP_MARK);
-		commit_list_insert(new_commit, &used);
-		if (new_commit == old_commit) {
-			found = 1;
-			break;
-		}
-	}
-	unmark_and_free(list, TMP_MARK);
-	unmark_and_free(used, TMP_MARK);
-	return found;
-}
-
 /*
  * Lookup the upstream branch for the given branch and if present, optionally
  * compute the commit ahead/behind values for the pair.
diff --git a/remote.h b/remote.h
index 45ecc6cefa..56fb9cbb27 100644
--- a/remote.h
+++ b/remote.h
@@ -149,7 +149,6 @@ extern struct ref **get_remote_refs(int fd_out, struct packet_reader *reader,
 				    const struct string_list *server_options);
 
 int resolve_remote_symref(struct ref *ref, struct ref *list);
-int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
 
 /*
  * Remove and free all but the first of any entries in the input list
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (2 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 03/18] commit-reach: move ref_newer from remote.c Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-08-28 21:24     ` Jonathan Nieder
  2018-07-20 16:33   ` [PATCH v2 05/18] upload-pack: make reachable() more generic Derrick Stolee
                     ` (17 subsequent siblings)
  21 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

There are several commit walks in the codebase. Group them together into
a new commit-reach.c file and corresponding header. After we group these
walks into one place, we can reduce duplicate logic by calling
equivalent methods.

All methods are direct moves, except we also make the commit_contains()
method public so its consumers in ref-filter.c can still call it. We can
also test this method in a test-tool in a later commit.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 121 +++++++++++++++++++++++++++++++++++++++++
 commit-reach.h |  20 ++++++-
 ref-filter.c   | 145 +++----------------------------------------------
 3 files changed, 147 insertions(+), 139 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index a6bc4781a6..01d796f011 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -1,8 +1,10 @@
 #include "cache.h"
 #include "commit.h"
+#include "commit-graph.h"
 #include "decorate.h"
 #include "prio-queue.h"
 #include "tree.h"
+#include "ref-filter.c"
 #include "revision.h"
 #include "tag.h"
 #include "commit-reach.h"
@@ -411,3 +413,122 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
 	unmark_and_free(used, TMP_MARK);
 	return found;
 }
+
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct contains_stack {
+	int nr, alloc;
+	struct contains_stack_entry {
+		struct commit *commit;
+		struct commit_list *parents;
+	} *contains_stack;
+};
+
+static int in_commit_list(const struct commit_list *want, struct commit *c)
+{
+	for (; want; want = want->next)
+		if (!oidcmp(&want->item->object.oid, &c->object.oid))
+			return 1;
+	return 0;
+}
+
+/*
+ * Test whether the candidate is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static enum contains_result contains_test(struct commit *candidate,
+					  const struct commit_list *want,
+					  struct contains_cache *cache,
+					  uint32_t cutoff)
+{
+	enum contains_result *cached = contains_cache_at(cache, candidate);
+
+	/* If we already have the answer cached, return that. */
+	if (*cached)
+		return *cached;
+
+	/* or are we it? */
+	if (in_commit_list(want, candidate)) {
+		*cached = CONTAINS_YES;
+		return CONTAINS_YES;
+	}
+
+	/* Otherwise, we don't know; prepare to recurse */
+	parse_commit_or_die(candidate);
+
+	if (candidate->generation < cutoff)
+		return CONTAINS_NO;
+
+	return CONTAINS_UNKNOWN;
+}
+
+static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack)
+{
+	ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc);
+	contains_stack->contains_stack[contains_stack->nr].commit = candidate;
+	contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents;
+}
+
+static enum contains_result contains_tag_algo(struct commit *candidate,
+					      const struct commit_list *want,
+					      struct contains_cache *cache)
+{
+	struct contains_stack contains_stack = { 0, 0, NULL };
+	enum contains_result result;
+	uint32_t cutoff = GENERATION_NUMBER_INFINITY;
+	const struct commit_list *p;
+
+	for (p = want; p; p = p->next) {
+		struct commit *c = p->item;
+		load_commit_graph_info(the_repository, c);
+		if (c->generation < cutoff)
+			cutoff = c->generation;
+	}
+
+	result = contains_test(candidate, want, cache, cutoff);
+	if (result != CONTAINS_UNKNOWN)
+		return result;
+
+	push_to_contains_stack(candidate, &contains_stack);
+	while (contains_stack.nr) {
+		struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1];
+		struct commit *commit = entry->commit;
+		struct commit_list *parents = entry->parents;
+
+		if (!parents) {
+			*contains_cache_at(cache, commit) = CONTAINS_NO;
+			contains_stack.nr--;
+		}
+		/*
+		 * If we just popped the stack, parents->item has been marked,
+		 * therefore contains_test will return a meaningful yes/no.
+		 */
+		else switch (contains_test(parents->item, want, cache, cutoff)) {
+		case CONTAINS_YES:
+			*contains_cache_at(cache, commit) = CONTAINS_YES;
+			contains_stack.nr--;
+			break;
+		case CONTAINS_NO:
+			entry->parents = parents->next;
+			break;
+		case CONTAINS_UNKNOWN:
+			push_to_contains_stack(parents->item, &contains_stack);
+			break;
+		}
+	}
+	free(contains_stack.contains_stack);
+	return contains_test(candidate, want, cache, cutoff);
+}
+
+int commit_contains(struct ref_filter *filter, struct commit *commit,
+		    struct commit_list *list, struct contains_cache *cache)
+{
+	if (filter->with_commit_tag_algo)
+		return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
+	return is_descendant_of(commit, list);
+}
diff --git a/commit-reach.h b/commit-reach.h
index f1cf9bfcd8..13dec25cee 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -1,8 +1,12 @@
 #ifndef __COMMIT_REACH_H__
 #define __COMMIT_REACH_H__
 
+#include "commit-slab.h"
+
 struct commit;
 struct commit_list;
+struct contains_cache;
+struct ref_filter;
 
 struct commit_list *get_merge_bases_many(struct commit *one,
 					 int n,
@@ -20,7 +24,6 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit);
 int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference);
 int in_merge_bases(struct commit *commit, struct commit *reference);
 
-
 /*
  * Takes a list of commits and returns a new list where those
  * have been removed that can be reached from other commits in
@@ -41,4 +44,19 @@ void reduce_heads_replace(struct commit_list **heads);
 
 int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
 
+/*
+ * Unknown has to be "0" here, because that's the default value for
+ * contains_cache slab entries that have not yet been assigned.
+ */
+enum contains_result {
+	CONTAINS_UNKNOWN = 0,
+	CONTAINS_NO,
+	CONTAINS_YES
+};
+
+define_commit_slab(contains_cache, enum contains_result);
+
+int commit_contains(struct ref_filter *filter, struct commit *commit,
+		    struct commit_list *list, struct contains_cache *cache);
+
 #endif
diff --git a/ref-filter.c b/ref-filter.c
index fca3ad040a..495e830fa5 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1624,144 +1624,6 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom,
 	return 0;
 }
 
-/*
- * Unknown has to be "0" here, because that's the default value for
- * contains_cache slab entries that have not yet been assigned.
- */
-enum contains_result {
-	CONTAINS_UNKNOWN = 0,
-	CONTAINS_NO,
-	CONTAINS_YES
-};
-
-define_commit_slab(contains_cache, enum contains_result);
-
-struct ref_filter_cbdata {
-	struct ref_array *array;
-	struct ref_filter *filter;
-	struct contains_cache contains_cache;
-	struct contains_cache no_contains_cache;
-};
-
-/*
- * Mimicking the real stack, this stack lives on the heap, avoiding stack
- * overflows.
- *
- * At each recursion step, the stack items points to the commits whose
- * ancestors are to be inspected.
- */
-struct contains_stack {
-	int nr, alloc;
-	struct contains_stack_entry {
-		struct commit *commit;
-		struct commit_list *parents;
-	} *contains_stack;
-};
-
-static int in_commit_list(const struct commit_list *want, struct commit *c)
-{
-	for (; want; want = want->next)
-		if (!oidcmp(&want->item->object.oid, &c->object.oid))
-			return 1;
-	return 0;
-}
-
-/*
- * Test whether the candidate is contained in the list.
- * Do not recurse to find out, though, but return -1 if inconclusive.
- */
-static enum contains_result contains_test(struct commit *candidate,
-					  const struct commit_list *want,
-					  struct contains_cache *cache,
-					  uint32_t cutoff)
-{
-	enum contains_result *cached = contains_cache_at(cache, candidate);
-
-	/* If we already have the answer cached, return that. */
-	if (*cached)
-		return *cached;
-
-	/* or are we it? */
-	if (in_commit_list(want, candidate)) {
-		*cached = CONTAINS_YES;
-		return CONTAINS_YES;
-	}
-
-	/* Otherwise, we don't know; prepare to recurse */
-	parse_commit_or_die(candidate);
-
-	if (candidate->generation < cutoff)
-		return CONTAINS_NO;
-
-	return CONTAINS_UNKNOWN;
-}
-
-static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack)
-{
-	ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc);
-	contains_stack->contains_stack[contains_stack->nr].commit = candidate;
-	contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents;
-}
-
-static enum contains_result contains_tag_algo(struct commit *candidate,
-					      const struct commit_list *want,
-					      struct contains_cache *cache)
-{
-	struct contains_stack contains_stack = { 0, 0, NULL };
-	enum contains_result result;
-	uint32_t cutoff = GENERATION_NUMBER_INFINITY;
-	const struct commit_list *p;
-
-	for (p = want; p; p = p->next) {
-		struct commit *c = p->item;
-		load_commit_graph_info(the_repository, c);
-		if (c->generation < cutoff)
-			cutoff = c->generation;
-	}
-
-	result = contains_test(candidate, want, cache, cutoff);
-	if (result != CONTAINS_UNKNOWN)
-		return result;
-
-	push_to_contains_stack(candidate, &contains_stack);
-	while (contains_stack.nr) {
-		struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1];
-		struct commit *commit = entry->commit;
-		struct commit_list *parents = entry->parents;
-
-		if (!parents) {
-			*contains_cache_at(cache, commit) = CONTAINS_NO;
-			contains_stack.nr--;
-		}
-		/*
-		 * If we just popped the stack, parents->item has been marked,
-		 * therefore contains_test will return a meaningful yes/no.
-		 */
-		else switch (contains_test(parents->item, want, cache, cutoff)) {
-		case CONTAINS_YES:
-			*contains_cache_at(cache, commit) = CONTAINS_YES;
-			contains_stack.nr--;
-			break;
-		case CONTAINS_NO:
-			entry->parents = parents->next;
-			break;
-		case CONTAINS_UNKNOWN:
-			push_to_contains_stack(parents->item, &contains_stack);
-			break;
-		}
-	}
-	free(contains_stack.contains_stack);
-	return contains_test(candidate, want, cache, cutoff);
-}
-
-static int commit_contains(struct ref_filter *filter, struct commit *commit,
-			   struct commit_list *list, struct contains_cache *cache)
-{
-	if (filter->with_commit_tag_algo)
-		return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
-	return is_descendant_of(commit, list);
-}
-
 /*
  * Return 1 if the refname matches one of the patterns, otherwise 0.
  * A pattern can be a literal prefix (e.g. a refname "refs/heads/master"
@@ -1988,6 +1850,13 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname)
 	return ref_kind_from_refname(refname);
 }
 
+struct ref_filter_cbdata {
+	struct ref_array *array;
+	struct ref_filter *filter;
+	struct contains_cache contains_cache;
+	struct contains_cache no_contains_cache;
+};
+
 /*
  * A call-back given to for_each_ref().  Filter refs and keep them for
  * later object processing.
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 05/18] upload-pack: make reachable() more generic
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (3 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 06/18] upload-pack: refactor ok_to_give_up() Derrick Stolee
                     ` (16 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

In anticipation of moving the reachable() method to commit-reach.c,
modify the prototype to be more generic to flags known outside of
upload-pack.c. Also rename 'want' to 'from' to make the statement
more clear outside of the context of haves/wants negotiation.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 upload-pack.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 4ca052d0b6..5a639cb47b 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -336,17 +336,18 @@ static int got_oid(const char *hex, struct object_id *oid)
 	return 0;
 }
 
-static int reachable(struct commit *want)
+static int reachable(struct commit *from, unsigned int with_flag,
+		     unsigned int assign_flag)
 {
 	struct prio_queue work = { compare_commits_by_commit_date };
 
-	prio_queue_put(&work, want);
+	prio_queue_put(&work, from);
 	while (work.nr) {
 		struct commit_list *list;
 		struct commit *commit = prio_queue_get(&work);
 
-		if (commit->object.flags & THEY_HAVE) {
-			want->object.flags |= COMMON_KNOWN;
+		if (commit->object.flags & with_flag) {
+			from->object.flags |= assign_flag;
 			break;
 		}
 		if (!commit->object.parsed)
@@ -362,10 +363,10 @@ static int reachable(struct commit *want)
 				prio_queue_put(&work, parent);
 		}
 	}
-	want->object.flags |= REACHABLE;
-	clear_commit_marks(want, REACHABLE);
+	from->object.flags |= REACHABLE;
+	clear_commit_marks(from, REACHABLE);
 	clear_prio_queue(&work);
-	return (want->object.flags & COMMON_KNOWN);
+	return (from->object.flags & assign_flag);
 }
 
 static int ok_to_give_up(void)
@@ -390,7 +391,7 @@ static int ok_to_give_up(void)
 			want_obj.objects[i].item->flags |= COMMON_KNOWN;
 			continue;
 		}
-		if (!reachable((struct commit *)want))
+		if (!reachable((struct commit *)want, THEY_HAVE, COMMON_KNOWN))
 			return 0;
 	}
 	return 1;
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 06/18] upload-pack: refactor ok_to_give_up()
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (4 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 05/18] upload-pack: make reachable() more generic Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 07/18] upload-pack: generalize commit date cutoff Derrick Stolee
                     ` (15 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

In anticipation of consolidating all commit reachability algorithms,
refactor ok_to_give_up() in order to allow splitting its logic into
an external method.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 upload-pack.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 5a639cb47b..9fe19003c6 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -369,34 +369,46 @@ static int reachable(struct commit *from, unsigned int with_flag,
 	return (from->object.flags & assign_flag);
 }
 
-static int ok_to_give_up(void)
+/*
+ * Determine if every commit in 'from' can reach at least one commit
+ * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
+ * as a marker for commits that are already visited.
+ */
+static int can_all_from_reach_with_flag(struct object_array *from,
+					unsigned int with_flag,
+					unsigned int assign_flag)
 {
 	int i;
 
-	if (!have_obj.nr)
-		return 0;
-
-	for (i = 0; i < want_obj.nr; i++) {
-		struct object *want = want_obj.objects[i].item;
+	for (i = 0; i < from->nr; i++) {
+		struct object *from_one = from->objects[i].item;
 
-		if (want->flags & COMMON_KNOWN)
+		if (from_one->flags & assign_flag)
 			continue;
-		want = deref_tag(the_repository, want, "a want line", 0);
-		if (!want || want->type != OBJ_COMMIT) {
+		from_one = deref_tag(the_repository, from_one, "a from object", 0);
+		if (!from_one || from_one->type != OBJ_COMMIT) {
 			/* no way to tell if this is reachable by
 			 * looking at the ancestry chain alone, so
 			 * leave a note to ourselves not to worry about
 			 * this object anymore.
 			 */
-			want_obj.objects[i].item->flags |= COMMON_KNOWN;
+			from->objects[i].item->flags |= assign_flag;
 			continue;
 		}
-		if (!reachable((struct commit *)want, THEY_HAVE, COMMON_KNOWN))
+		if (!reachable((struct commit *)from_one, with_flag, assign_flag))
 			return 0;
 	}
 	return 1;
 }
 
+static int ok_to_give_up(void)
+{
+	if (!have_obj.nr)
+		return 0;
+
+	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, COMMON_KNOWN);
+}
+
 static int get_common_commits(void)
 {
 	struct object_id oid;
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 07/18] upload-pack: generalize commit date cutoff
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (5 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 06/18] upload-pack: refactor ok_to_give_up() Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 08/18] commit-reach: move can_all_from_reach_with_flags Derrick Stolee
                     ` (14 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The ok_to_give_up() method uses the commit date as a cutoff to avoid
walking the entire reachble set of commits. Before moving the
reachable() method to commit-reach.c, pull out the dependence on the
global constant 'oldest_have' with a 'min_commit_date' parameter.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 upload-pack.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 9fe19003c6..427de461d8 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -337,7 +337,7 @@ static int got_oid(const char *hex, struct object_id *oid)
 }
 
 static int reachable(struct commit *from, unsigned int with_flag,
-		     unsigned int assign_flag)
+		     unsigned int assign_flag, time_t min_commit_date)
 {
 	struct prio_queue work = { compare_commits_by_commit_date };
 
@@ -355,7 +355,7 @@ static int reachable(struct commit *from, unsigned int with_flag,
 		if (commit->object.flags & REACHABLE)
 			continue;
 		commit->object.flags |= REACHABLE;
-		if (commit->date < oldest_have)
+		if (commit->date < min_commit_date)
 			continue;
 		for (list = commit->parents; list; list = list->next) {
 			struct commit *parent = list->item;
@@ -372,11 +372,13 @@ static int reachable(struct commit *from, unsigned int with_flag,
 /*
  * Determine if every commit in 'from' can reach at least one commit
  * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
- * as a marker for commits that are already visited.
+ * as a marker for commits that are already visited. Do not walk
+ * commits with date below 'min_commit_date'.
  */
 static int can_all_from_reach_with_flag(struct object_array *from,
 					unsigned int with_flag,
-					unsigned int assign_flag)
+					unsigned int assign_flag,
+					time_t min_commit_date)
 {
 	int i;
 
@@ -395,7 +397,8 @@ static int can_all_from_reach_with_flag(struct object_array *from,
 			from->objects[i].item->flags |= assign_flag;
 			continue;
 		}
-		if (!reachable((struct commit *)from_one, with_flag, assign_flag))
+		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
+			       min_commit_date))
 			return 0;
 	}
 	return 1;
@@ -406,7 +409,8 @@ static int ok_to_give_up(void)
 	if (!have_obj.nr)
 		return 0;
 
-	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, COMMON_KNOWN);
+	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE,
+					    COMMON_KNOWN, oldest_have);
 }
 
 static int get_common_commits(void)
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 08/18] commit-reach: move can_all_from_reach_with_flags
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (6 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 07/18] upload-pack: generalize commit date cutoff Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 09/18] test-reach: create new test tool for ref_newer Derrick Stolee
                     ` (13 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

There are several commit walks in the codebase. Group them together into
a new commit-reach.c file and corresponding header. After we group these
walks into one place, we can reduce duplicate logic by calling
equivalent methods.

The can_all_from_reach_with_flags method is used in a stateful way by
upload-pack.c. The parameters are very flexible, so we will be able to
use its commit walking logic for many other callers.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 63 +++++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h | 14 ++++++++++
 object.h       |  4 +--
 upload-pack.c  | 70 +-------------------------------------------------
 4 files changed, 80 insertions(+), 71 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index 01d796f011..d806291d5d 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -10,6 +10,7 @@
 #include "commit-reach.h"
 
 /* Remember to update object flag allocation in object.h */
+#define REACHABLE       (1u<<15)
 #define PARENT1		(1u<<16)
 #define PARENT2		(1u<<17)
 #define STALE		(1u<<18)
@@ -532,3 +533,65 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 		return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
 	return is_descendant_of(commit, list);
 }
+
+int reachable(struct commit *from, unsigned int with_flag,
+	      unsigned int assign_flag, time_t min_commit_date)
+{
+	struct prio_queue work = { compare_commits_by_commit_date };
+
+	prio_queue_put(&work, from);
+	while (work.nr) {
+		struct commit_list *list;
+		struct commit *commit = prio_queue_get(&work);
+
+		if (commit->object.flags & with_flag) {
+			from->object.flags |= assign_flag;
+			break;
+		}
+		if (!commit->object.parsed)
+			parse_object(the_repository, &commit->object.oid);
+		if (commit->object.flags & REACHABLE)
+			continue;
+		commit->object.flags |= REACHABLE;
+		if (commit->date < min_commit_date)
+			continue;
+		for (list = commit->parents; list; list = list->next) {
+			struct commit *parent = list->item;
+			if (!(parent->object.flags & REACHABLE))
+				prio_queue_put(&work, parent);
+		}
+	}
+	from->object.flags |= REACHABLE;
+	clear_commit_marks(from, REACHABLE);
+	clear_prio_queue(&work);
+	return (from->object.flags & assign_flag);
+}
+
+int can_all_from_reach_with_flag(struct object_array *from,
+				 unsigned int with_flag,
+				 unsigned int assign_flag,
+				 time_t min_commit_date)
+{
+	int i;
+
+	for (i = 0; i < from->nr; i++) {
+		struct object *from_one = from->objects[i].item;
+
+		if (from_one->flags & assign_flag)
+			continue;
+		from_one = deref_tag(the_repository, from_one, "a from object", 0);
+		if (!from_one || from_one->type != OBJ_COMMIT) {
+			/* no way to tell if this is reachable by
+			 * looking at the ancestry chain alone, so
+			 * leave a note to ourselves not to worry about
+			 * this object anymore.
+			 */
+			from->objects[i].item->flags |= assign_flag;
+			continue;
+		}
+		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
+			       min_commit_date))
+			return 0;
+	}
+	return 1;
+}
diff --git a/commit-reach.h b/commit-reach.h
index 13dec25cee..b28bc22fcd 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -59,4 +59,18 @@ define_commit_slab(contains_cache, enum contains_result);
 int commit_contains(struct ref_filter *filter, struct commit *commit,
 		    struct commit_list *list, struct contains_cache *cache);
 
+int reachable(struct commit *from, unsigned int with_flag,
+	      unsigned int assign_flag, time_t min_commit_date);
+
+/*
+ * Determine if every commit in 'from' can reach at least one commit
+ * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
+ * as a marker for commits that are already visited. Do not walk
+ * commits with date below 'min_commit_date'.
+ */
+int can_all_from_reach_with_flag(struct object_array *from,
+				 unsigned int with_flag,
+				 unsigned int assign_flag,
+				 time_t min_commit_date);
+
 #endif
diff --git a/object.h b/object.h
index 18c2b073e3..b132944c51 100644
--- a/object.h
+++ b/object.h
@@ -60,12 +60,12 @@ struct object_array {
  * revision.h:               0---------10                                26
  * fetch-pack.c:             0----5
  * walker.c:                 0-2
- * upload-pack.c:                4       11----------------19
+ * upload-pack.c:                4       11-----14  16-----19
  * builtin/blame.c:                        12-13
  * bisect.c:                                        16
  * bundle.c:                                        16
  * http-push.c:                                     16-----19
- * commit-reach.c:                                  16-----19
+ * commit-reach.c:                                15-------19
  * sha1-name.c:                                              20
  * list-objects-filter.c:                                      21
  * builtin/fsck.c:           0--3
diff --git a/upload-pack.c b/upload-pack.c
index 427de461d8..11c426685d 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -24,13 +24,13 @@
 #include "quote.h"
 #include "upload-pack.h"
 #include "serve.h"
+#include "commit-reach.h"
 
 /* Remember to update object flag allocation in object.h */
 #define THEY_HAVE	(1u << 11)
 #define OUR_REF		(1u << 12)
 #define WANTED		(1u << 13)
 #define COMMON_KNOWN	(1u << 14)
-#define REACHABLE	(1u << 15)
 
 #define SHALLOW		(1u << 16)
 #define NOT_SHALLOW	(1u << 17)
@@ -336,74 +336,6 @@ static int got_oid(const char *hex, struct object_id *oid)
 	return 0;
 }
 
-static int reachable(struct commit *from, unsigned int with_flag,
-		     unsigned int assign_flag, time_t min_commit_date)
-{
-	struct prio_queue work = { compare_commits_by_commit_date };
-
-	prio_queue_put(&work, from);
-	while (work.nr) {
-		struct commit_list *list;
-		struct commit *commit = prio_queue_get(&work);
-
-		if (commit->object.flags & with_flag) {
-			from->object.flags |= assign_flag;
-			break;
-		}
-		if (!commit->object.parsed)
-			parse_object(the_repository, &commit->object.oid);
-		if (commit->object.flags & REACHABLE)
-			continue;
-		commit->object.flags |= REACHABLE;
-		if (commit->date < min_commit_date)
-			continue;
-		for (list = commit->parents; list; list = list->next) {
-			struct commit *parent = list->item;
-			if (!(parent->object.flags & REACHABLE))
-				prio_queue_put(&work, parent);
-		}
-	}
-	from->object.flags |= REACHABLE;
-	clear_commit_marks(from, REACHABLE);
-	clear_prio_queue(&work);
-	return (from->object.flags & assign_flag);
-}
-
-/*
- * Determine if every commit in 'from' can reach at least one commit
- * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
- * as a marker for commits that are already visited. Do not walk
- * commits with date below 'min_commit_date'.
- */
-static int can_all_from_reach_with_flag(struct object_array *from,
-					unsigned int with_flag,
-					unsigned int assign_flag,
-					time_t min_commit_date)
-{
-	int i;
-
-	for (i = 0; i < from->nr; i++) {
-		struct object *from_one = from->objects[i].item;
-
-		if (from_one->flags & assign_flag)
-			continue;
-		from_one = deref_tag(the_repository, from_one, "a from object", 0);
-		if (!from_one || from_one->type != OBJ_COMMIT) {
-			/* no way to tell if this is reachable by
-			 * looking at the ancestry chain alone, so
-			 * leave a note to ourselves not to worry about
-			 * this object anymore.
-			 */
-			from->objects[i].item->flags |= assign_flag;
-			continue;
-		}
-		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
-			       min_commit_date))
-			return 0;
-	}
-	return 1;
-}
-
 static int ok_to_give_up(void)
 {
 	if (!have_obj.nr)
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 09/18] test-reach: create new test tool for ref_newer
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (7 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 08/18] commit-reach: move can_all_from_reach_with_flags Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 10/18] test-reach: test in_merge_bases Derrick Stolee
                     ` (12 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

As we prepare to change the behavior of the algorithms in
commit-reach.c, create a new test-tool subcommand 'reach' to test these
methods on interesting commit-graph shapes.

To use the new test-tool, use 'test-tool reach <method>' and provide
input to stdin that describes the inputs to the method. Currently, we
only implement the ref_newer method, which requires two commits. Use
lines "A:<committish>" and "B:<committish>" for the two inputs. We will
expand this input later to accommodate methods that take lists of
commits.

The test t6600-test-reach.sh creates a repo whose commits form a
two-dimensional grid. This grid makes it easy for us to determine
reachability because commit-A-B can reach commit-X-Y if and only if A is
at least X and B is at least Y. This helps create interesting test cases
for each result of the methods in commit-reach.c.

We test all methods in three different states of the commit-graph file:
Non-existent (no generation numbers), fully computed, and mixed (some
commits have generation numbers and others do not).

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 Makefile              |  1 +
 t/helper/test-reach.c | 63 +++++++++++++++++++++++++++++++
 t/helper/test-tool.c  |  1 +
 t/helper/test-tool.h  |  1 +
 t/t6600-test-reach.sh | 86 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 152 insertions(+)
 create mode 100644 t/helper/test-reach.c
 create mode 100755 t/t6600-test-reach.sh

diff --git a/Makefile b/Makefile
index 59781f4bc3..d69f9d415d 100644
--- a/Makefile
+++ b/Makefile
@@ -716,6 +716,7 @@ TEST_BUILTINS_OBJS += test-mktemp.o
 TEST_BUILTINS_OBJS += test-online-cpus.o
 TEST_BUILTINS_OBJS += test-path-utils.o
 TEST_BUILTINS_OBJS += test-prio-queue.o
+TEST_BUILTINS_OBJS += test-reach.o
 TEST_BUILTINS_OBJS += test-read-cache.o
 TEST_BUILTINS_OBJS += test-ref-store.o
 TEST_BUILTINS_OBJS += test-regex.o
diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
new file mode 100644
index 0000000000..620bb46041
--- /dev/null
+++ b/t/helper/test-reach.c
@@ -0,0 +1,63 @@
+#include "test-tool.h"
+#include "cache.h"
+#include "commit.h"
+#include "commit-reach.h"
+#include "config.h"
+#include "parse-options.h"
+#include "tag.h"
+
+int cmd__reach(int ac, const char **av)
+{
+	struct object_id oid_A, oid_B;
+	struct strbuf buf = STRBUF_INIT;
+	struct repository *r = the_repository;
+
+	setup_git_directory();
+
+	if (ac < 2)
+		exit(1);
+
+
+	while (strbuf_getline(&buf, stdin) != EOF) {
+		struct object_id oid;
+		struct object *o;
+		struct commit *c;
+		if (buf.len < 3)
+			continue;
+
+		if (get_oid_committish(buf.buf + 2, &oid))
+			die("failed to resolve %s", buf.buf + 2);
+
+		o = parse_object(r, &oid);
+		o = deref_tag_noverify(o);
+
+		if (!o)
+			die("failed to load commit for input %s resulting in oid %s\n",
+			    buf.buf, oid_to_hex(&oid));
+
+		c = object_as_type(r, o, OBJ_COMMIT, 0);
+
+		if (!c)
+			die("failed to load commit for input %s resulting in oid %s\n",
+			    buf.buf, oid_to_hex(&oid));
+
+		switch (buf.buf[0]) {
+			case 'A':
+				oidcpy(&oid_A, &oid);
+				break;
+
+			case 'B':
+				oidcpy(&oid_B, &oid);
+				break;
+
+			default:
+				die("unexpected start of line: %c", buf.buf[0]);
+		}
+	}
+	strbuf_release(&buf);
+
+	if (!strcmp(av[1], "ref_newer"))
+		printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B));
+
+	exit(0);
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index dafc91c240..582d02adfd 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -26,6 +26,7 @@ static struct test_cmd cmds[] = {
 	{ "online-cpus", cmd__online_cpus },
 	{ "path-utils", cmd__path_utils },
 	{ "prio-queue", cmd__prio_queue },
+	{ "reach", cmd__reach },
 	{ "read-cache", cmd__read_cache },
 	{ "ref-store", cmd__ref_store },
 	{ "regex", cmd__regex },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 80cbcf0857..a7e53c420e 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -20,6 +20,7 @@ int cmd__mktemp(int argc, const char **argv);
 int cmd__online_cpus(int argc, const char **argv);
 int cmd__path_utils(int argc, const char **argv);
 int cmd__prio_queue(int argc, const char **argv);
+int cmd__reach(int argc, const char **argv);
 int cmd__read_cache(int argc, const char **argv);
 int cmd__ref_store(int argc, const char **argv);
 int cmd__regex(int argc, const char **argv);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
new file mode 100755
index 0000000000..966309c6cf
--- /dev/null
+++ b/t/t6600-test-reach.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+test_description='basic commit reachability tests'
+
+. ./test-lib.sh
+
+# Construct a grid-like commit graph with points (x,y)
+# with 1 <= x <= 10, 1 <= y <= 10, where (x,y) has
+# parents (x-1, y) and (x, y-1), keeping in mind that
+# we drop a parent if a coordinate is nonpositive.
+#
+#             (10,10)
+#            /       \
+#         (10,9)    (9,10)
+#        /     \   /      \
+#    (10,8)    (9,9)      (8,10)
+#   /     \    /   \      /    \
+#         ( continued...)
+#   \     /    \   /      \    /
+#    (3,1)     (2,2)      (1,3)
+#        \     /    \     /
+#         (2,1)      (2,1)
+#              \    /
+#              (1,1)
+#
+# We use branch 'commit-x-y' to refer to (x,y).
+# This grid allows interesting reachability and
+# non-reachability queries: (x,y) can reach (x',y')
+# if and only if x' <= x and y' <= y.
+test_expect_success 'setup' '
+	for i in $(test_seq 1 10)
+	do
+		test_commit "1-$i" &&
+		git branch -f commit-1-$i
+	done &&
+	for j in $(test_seq 1 9)
+	do
+		git reset --hard commit-$j-1 &&
+		x=$(($j + 1)) &&
+		test_commit "$x-1" &&
+		git branch -f commit-$x-1 &&
+
+		for i in $(test_seq 2 10)
+		do
+			git merge commit-$j-$i -m "$x-$i" &&
+			git branch -f commit-$x-$i
+		done
+	done &&
+	git commit-graph write --reachable &&
+	mv .git/objects/info/commit-graph commit-graph-full &&
+	git show-ref -s commit-5-5 | git commit-graph write --stdin-commits &&
+	mv .git/objects/info/commit-graph commit-graph-half &&
+	git config core.commitGraph true
+'
+
+test_three_modes () {
+	test_when_finished rm -rf .git/objects/info/commit-graph &&
+	test-tool reach $1 <input >actual &&
+	test_cmp expect actual &&
+	cp commit-graph-full .git/objects/info/commit-graph &&
+	test-tool reach $1 <input >actual &&
+	test_cmp expect actual &&
+	cp commit-graph-half .git/objects/info/commit-graph &&
+	test-tool reach $1 <input >actual &&
+	test_cmp expect actual
+}
+
+test_expect_success 'ref_newer:miss' '
+	cat >input <<-\EOF &&
+	A:commit-5-7
+	B:commit-4-9
+	EOF
+	echo "ref_newer(A,B):0" >expect &&
+	test_three_modes ref_newer
+'
+
+test_expect_success 'ref_newer:hit' '
+	cat >input <<-\EOF &&
+	A:commit-5-7
+	B:commit-2-3
+	EOF
+	echo "ref_newer(A,B):1" >expect &&
+	test_three_modes ref_newer
+'
+
+test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 10/18] test-reach: test in_merge_bases
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (8 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 09/18] test-reach: create new test tool for ref_newer Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 11/18] test-reach: test is_descendant_of Derrick Stolee
                     ` (11 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c |  6 ++++++
 t/t6600-test-reach.sh | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 620bb46041..f93ad5084d 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -9,6 +9,7 @@
 int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
+	struct commit *A, *B;
 	struct strbuf buf = STRBUF_INIT;
 	struct repository *r = the_repository;
 
@@ -17,6 +18,7 @@ int cmd__reach(int ac, const char **av)
 	if (ac < 2)
 		exit(1);
 
+	A = B = NULL;
 
 	while (strbuf_getline(&buf, stdin) != EOF) {
 		struct object_id oid;
@@ -44,10 +46,12 @@ int cmd__reach(int ac, const char **av)
 		switch (buf.buf[0]) {
 			case 'A':
 				oidcpy(&oid_A, &oid);
+				A = c;
 				break;
 
 			case 'B':
 				oidcpy(&oid_B, &oid);
+				B = c;
 				break;
 
 			default:
@@ -58,6 +62,8 @@ int cmd__reach(int ac, const char **av)
 
 	if (!strcmp(av[1], "ref_newer"))
 		printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B));
+	else if (!strcmp(av[1], "in_merge_bases"))
+		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
 
 	exit(0);
 }
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 966309c6cf..5cd6b14c69 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -83,4 +83,22 @@ test_expect_success 'ref_newer:hit' '
 	test_three_modes ref_newer
 '
 
+test_expect_success 'in_merge_bases:hit' '
+	cat >input <<-\EOF &&
+	A:commit-5-7
+	B:commit-8-8
+	EOF
+	echo "in_merge_bases(A,B):1" >expect &&
+	test_three_modes in_merge_bases
+'
+
+test_expect_success 'in_merge_bases:miss' '
+	cat >input <<-\EOF &&
+	A:commit-6-8
+	B:commit-5-9
+	EOF
+	echo "in_merge_bases(A,B):0" >expect &&
+	test_three_modes in_merge_bases
+'
+
 test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 11/18] test-reach: test is_descendant_of
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (9 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 10/18] test-reach: test in_merge_bases Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 12/18] test-reach: test get_merge_bases_many Derrick Stolee
                     ` (10 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The is_descendant_of method takes a single commit as its first parameter
and a list of commits as its second parameter. Extend the input of the
'test-tool reach' command to take multiple lines of the form
"X:<committish>" to construct a list of commits. Pass these to
is_descendant_of and create tests that check each result.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c |  8 ++++++++
 t/t6600-test-reach.sh | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index f93ad5084d..dccbd48178 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -10,6 +10,7 @@ int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
 	struct commit *A, *B;
+	struct commit_list *X;
 	struct strbuf buf = STRBUF_INIT;
 	struct repository *r = the_repository;
 
@@ -19,6 +20,7 @@ int cmd__reach(int ac, const char **av)
 		exit(1);
 
 	A = B = NULL;
+	X = NULL;
 
 	while (strbuf_getline(&buf, stdin) != EOF) {
 		struct object_id oid;
@@ -54,6 +56,10 @@ int cmd__reach(int ac, const char **av)
 				B = c;
 				break;
 
+			case 'X':
+				commit_list_insert(c, &X);
+				break;
+
 			default:
 				die("unexpected start of line: %c", buf.buf[0]);
 		}
@@ -64,6 +70,8 @@ int cmd__reach(int ac, const char **av)
 		printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B));
 	else if (!strcmp(av[1], "in_merge_bases"))
 		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
+	else if (!strcmp(av[1], "is_descendant_of"))
+		printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X));
 
 	exit(0);
 }
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 5cd6b14c69..98bcb17960 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -101,4 +101,26 @@ test_expect_success 'in_merge_bases:miss' '
 	test_three_modes in_merge_bases
 '
 
+test_expect_success 'is_descendant_of:hit' '
+	cat >input <<-\EOF &&
+	A:commit-5-7
+	X:commit-4-8
+	X:commit-6-6
+	X:commit-1-1
+	EOF
+	echo "is_descendant_of(A,X):1" >expect &&
+	test_three_modes is_descendant_of
+'
+
+test_expect_success 'is_descendant_of:miss' '
+	cat >input <<-\EOF &&
+	A:commit-6-8
+	X:commit-5-9
+	X:commit-4-10
+	X:commit-7-6
+	EOF
+	echo "is_descendant_of(A,X):0" >expect &&
+	test_three_modes is_descendant_of
+'
+
 test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 12/18] test-reach: test get_merge_bases_many
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (10 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 11/18] test-reach: test is_descendant_of Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 13/18] test-reach: test reduce_heads Derrick Stolee
                     ` (9 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The get_merge_bases_many method returns a list of merge bases for a
single commit (A) against a list of commits (X). Some care is needed in
constructing the expected behavior because the result is not the
expected merge-base for an octopus merge with those parents but instead
the set of maximal commits that are reachable from A and at least one of
the commits in X.

Add get_merge_bases_many to 'test-tool reach' and create a test that
demonstrates that this output returns multiple results. Specifically, we
select a list of three commits such that we output two commits that are
reachable from one of the first two, respectively, and none are
reachable from the third.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c | 31 +++++++++++++++++++++++++++++++
 t/t6600-test-reach.sh | 15 +++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index dccbd48178..4df01187c9 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -4,13 +4,34 @@
 #include "commit-reach.h"
 #include "config.h"
 #include "parse-options.h"
+#include "string-list.h"
 #include "tag.h"
 
+static void print_sorted_commit_ids(struct commit_list *list)
+{
+	int i;
+	struct string_list s = STRING_LIST_INIT_DUP;
+
+	while (list) {
+		string_list_append(&s, oid_to_hex(&list->item->object.oid));
+		list = list->next;
+	}
+
+	string_list_sort(&s);
+
+	for (i = 0; i < s.nr; i++)
+		printf("%s\n", s.items[i].string);
+
+	string_list_clear(&s, 0);
+}
+
 int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
 	struct commit *A, *B;
 	struct commit_list *X;
+	struct commit **X_array;
+	int X_nr, X_alloc;
 	struct strbuf buf = STRBUF_INIT;
 	struct repository *r = the_repository;
 
@@ -21,6 +42,9 @@ int cmd__reach(int ac, const char **av)
 
 	A = B = NULL;
 	X = NULL;
+	X_nr = 0;
+	X_alloc = 16;
+	ALLOC_ARRAY(X_array, X_alloc);
 
 	while (strbuf_getline(&buf, stdin) != EOF) {
 		struct object_id oid;
@@ -58,6 +82,8 @@ int cmd__reach(int ac, const char **av)
 
 			case 'X':
 				commit_list_insert(c, &X);
+				ALLOC_GROW(X_array, X_nr + 1, X_alloc);
+				X_array[X_nr++] = c;
 				break;
 
 			default:
@@ -72,6 +98,11 @@ int cmd__reach(int ac, const char **av)
 		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
 	else if (!strcmp(av[1], "is_descendant_of"))
 		printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X));
+	else if (!strcmp(av[1], "get_merge_bases_many")) {
+		struct commit_list *list = get_merge_bases_many(A, X_nr, X_array);
+		printf("%s(A,X):\n", av[1]);
+		print_sorted_commit_ids(list);
+	}
 
 	exit(0);
 }
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 98bcb17960..d43e1a61d5 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -123,4 +123,19 @@ test_expect_success 'is_descendant_of:miss' '
 	test_three_modes is_descendant_of
 '
 
+test_expect_success 'get_merge_bases_many' '
+	cat >input <<-\EOF &&
+	A:commit-5-7
+	X:commit-4-8
+	X:commit-6-6
+	X:commit-8-3
+	EOF
+	{
+		echo "get_merge_bases_many(A,X):" &&
+		git rev-parse commit-5-6 \
+			      commit-4-7 | sort
+	} >expect &&
+	test_three_modes get_merge_bases_many
+'
+
 test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 13/18] test-reach: test reduce_heads
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (11 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 12/18] test-reach: test get_merge_bases_many Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 14/18] test-reach: test can_all_from_reach_with_flags Derrick Stolee
                     ` (8 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c |  4 ++++
 t/t6600-test-reach.sh | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index 4df01187c9..e32e193b70 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -102,6 +102,10 @@ int cmd__reach(int ac, const char **av)
 		struct commit_list *list = get_merge_bases_many(A, X_nr, X_array);
 		printf("%s(A,X):\n", av[1]);
 		print_sorted_commit_ids(list);
+	} else if (!strcmp(av[1], "reduce_heads")) {
+		struct commit_list *list = reduce_heads(X);
+		printf("%s(X):\n", av[1]);
+		print_sorted_commit_ids(list);
 	}
 
 	exit(0);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index d43e1a61d5..17c6467988 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -138,4 +138,26 @@ test_expect_success 'get_merge_bases_many' '
 	test_three_modes get_merge_bases_many
 '
 
+test_expect_success 'reduce_heads' '
+	cat >input <<-\EOF &&
+	X:commit-1-10
+	X:commit-2-8
+	X:commit-3-6
+	X:commit-4-4
+	X:commit-1-7
+	X:commit-2-5
+	X:commit-3-3
+	X:commit-5-1
+	EOF
+	{
+		echo "reduce_heads(X):" &&
+		git rev-parse commit-5-1 \
+			      commit-4-4 \
+			      commit-3-6 \
+			      commit-2-8 \
+			      commit-1-10 | sort
+	} >expect &&
+	test_three_modes reduce_heads
+'
+
 test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 14/18] test-reach: test can_all_from_reach_with_flags
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (12 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 13/18] test-reach: test reduce_heads Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 15/18] test-reach: test commit_contains Derrick Stolee
                     ` (7 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The can_all_from_reach_with_flags method is used by ok_to_give_up in
upload-pack.c to see if we have done enough negotiation during a fetch.
This method is intentionally created to preserve state between calls to
assist with stateful negotiation, such as over SSH.

To make this method testable, add a new can_all_from_reach method that
does the initial setup and final tear-down. We will later use this
method in production code. Call the method from 'test-tool reach' for
now.

Since this is a many-to-many reachability query, add a new type of input
to the 'test-tool reach' input format. Lines "Y:<committish>" create a
list of commits to be the reachability targets from the commits in the
'X' list. In the context of fetch negotiation, the 'X' commits are the
'want' commits and the 'Y' commits are the 'have' commits.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c        | 47 +++++++++++++++++++++++++++++++++++++++++++
 commit-reach.h        |  2 ++
 t/helper/test-reach.c | 10 +++++++--
 t/t6600-test-reach.sh | 45 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index d806291d5d..940fbf2e17 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -595,3 +595,50 @@ int can_all_from_reach_with_flag(struct object_array *from,
 	}
 	return 1;
 }
+
+int can_all_from_reach(struct commit_list *from, struct commit_list *to,
+		       int cutoff_by_min_date)
+{
+	struct object_array from_objs = OBJECT_ARRAY_INIT;
+	time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0;
+	struct commit_list *from_iter = from, *to_iter = to;
+	int result;
+
+	while (from_iter) {
+		add_object_array(&from_iter->item->object, NULL, &from_objs);
+
+		if (!parse_commit(from_iter->item)) {
+			if (from_iter->item->date < min_commit_date)
+				min_commit_date = from_iter->item->date;
+		}
+
+		from_iter = from_iter->next;
+	}
+
+	while (to_iter) {
+		if (!parse_commit(to_iter->item)) {
+			if (to_iter->item->date < min_commit_date)
+				min_commit_date = to_iter->item->date;
+		}
+
+		to_iter->item->object.flags |= PARENT2;
+
+		to_iter = to_iter->next;
+	}
+
+	result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1,
+					      min_commit_date);
+
+	while (from) {
+		clear_commit_marks(from->item, PARENT1);
+		from = from->next;
+	}
+
+	while (to) {
+		clear_commit_marks(to->item, PARENT2);
+		to = to->next;
+	}
+
+	object_array_clear(&from_objs);
+	return result;
+}
diff --git a/commit-reach.h b/commit-reach.h
index b28bc22fcd..aa202c9703 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -72,5 +72,7 @@ int can_all_from_reach_with_flag(struct object_array *from,
 				 unsigned int with_flag,
 				 unsigned int assign_flag,
 				 time_t min_commit_date);
+int can_all_from_reach(struct commit_list *from, struct commit_list *to,
+		       int commit_date_cutoff);
 
 #endif
diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index e32e193b70..c79729cac0 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -29,7 +29,7 @@ int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
 	struct commit *A, *B;
-	struct commit_list *X;
+	struct commit_list *X, *Y;
 	struct commit **X_array;
 	int X_nr, X_alloc;
 	struct strbuf buf = STRBUF_INIT;
@@ -41,7 +41,7 @@ int cmd__reach(int ac, const char **av)
 		exit(1);
 
 	A = B = NULL;
-	X = NULL;
+	X = Y = NULL;
 	X_nr = 0;
 	X_alloc = 16;
 	ALLOC_ARRAY(X_array, X_alloc);
@@ -86,6 +86,10 @@ int cmd__reach(int ac, const char **av)
 				X_array[X_nr++] = c;
 				break;
 
+			case 'Y':
+				commit_list_insert(c, &Y);
+				break;
+
 			default:
 				die("unexpected start of line: %c", buf.buf[0]);
 		}
@@ -106,6 +110,8 @@ int cmd__reach(int ac, const char **av)
 		struct commit_list *list = reduce_heads(X);
 		printf("%s(X):\n", av[1]);
 		print_sorted_commit_ids(list);
+	} else if (!strcmp(av[1], "can_all_from_reach")) {
+		printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1));
 	}
 
 	exit(0);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 17c6467988..e41eb397a7 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -160,4 +160,49 @@ test_expect_success 'reduce_heads' '
 	test_three_modes reduce_heads
 '
 
+test_expect_success 'can_all_from_reach:hit' '
+	cat >input <<-\EOF &&
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	Y:commit-1-9
+	Y:commit-2-8
+	Y:commit-3-7
+	Y:commit-4-6
+	Y:commit-5-5
+	Y:commit-6-4
+	Y:commit-7-3
+	Y:commit-8-1
+	EOF
+	echo "can_all_from_reach(X,Y):1" >expect &&
+	test_three_modes can_all_from_reach
+'
+
+test_expect_success 'can_all_from_reach:miss' '
+	cat >input <<-\EOF &&
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	Y:commit-1-9
+	Y:commit-2-8
+	Y:commit-3-7
+	Y:commit-4-6
+	Y:commit-5-5
+	Y:commit-6-4
+	Y:commit-8-5
+	EOF
+	echo "can_all_from_reach(X,Y):0" >expect &&
+	test_three_modes can_all_from_reach
+'
+
 test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 15/18] test-reach: test commit_contains
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (13 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 14/18] test-reach: test can_all_from_reach_with_flags Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-23 20:35     ` Jonathan Tan
  2018-07-20 16:33   ` [PATCH v2 16/18] commit-reach: replace ref_newer logic Derrick Stolee
                     ` (6 subsequent siblings)
  21 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The commit_contains method has two modes which depend on the given
ref_filter struct. We have the "normal" algorithm (which is also the
typically-slow operation) and the "tag" algorithm. This difference is
essentially what changes performance for 'git branch --contains' versus
'git tag --contains'. There are thoughts that the data shapes used by
these two applications justify the different implementations.

Create tests using 'test-tool reach commit_contains [--tag]' to cover
both methods.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/helper/test-reach.c | 12 ++++++++++++
 t/t6600-test-reach.sh | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index c79729cac0..eb21103998 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -4,6 +4,7 @@
 #include "commit-reach.h"
 #include "config.h"
 #include "parse-options.h"
+#include "ref-filter.h"
 #include "string-list.h"
 #include "tag.h"
 
@@ -112,6 +113,17 @@ int cmd__reach(int ac, const char **av)
 		print_sorted_commit_ids(list);
 	} else if (!strcmp(av[1], "can_all_from_reach")) {
 		printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1));
+	} else if (!strcmp(av[1], "commit_contains")) {
+		struct ref_filter filter;
+		struct contains_cache cache;
+		init_contains_cache(&cache);
+
+		if (ac > 2 && !strcmp(av[2], "--tag"))
+			filter.with_commit_tag_algo = 1;
+		else
+			filter.with_commit_tag_algo = 0;
+
+		printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache));
 	}
 
 	exit(0);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index e41eb397a7..d139a00d1d 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -205,4 +205,38 @@ test_expect_success 'can_all_from_reach:miss' '
 	test_three_modes can_all_from_reach
 '
 
+test_expect_success 'commit_contains:hit' '
+	cat >input <<-\EOF &&
+	A:commit-7-7
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	EOF
+	echo "commit_contains(_,A,X,_):1" >expect &&
+	test_three_modes commit_contains &&
+	test_three_modes commit_contains --tag
+'
+
+test_expect_success 'commit_contains:miss' '
+	cat >input <<-\EOF &&
+	A:commit-6-5
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	EOF
+	echo "commit_contains(_,A,X,_):0" >expect &&
+	test_three_modes commit_contains &&
+	test_three_modes commit_contains --tag
+'
+
 test_done
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 16/18] commit-reach: replace ref_newer logic
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (14 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 15/18] test-reach: test commit_contains Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 16:33   ` [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear Derrick Stolee
                     ` (5 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The ref_newer method is used by 'git push' to check if a force-push is
required. This method does not use any kind of cutoff when walking, so
in the case of a force-push will walk all reachable commits.

The is_descendant_of method already uses paint_down_to_common along with
cutoffs. By translating the ref_newer arguments into the commit and
commit_list required by is_descendant_of, we can have one fewer commit
walk and also improve our performance!

For a copy of the Linux repository, 'test-tool reach ref_newer' presents
the following improvements with the specified input. In the case that
ref_newer returns 1, there is no improvement. The improvement is in the
second case where ref_newer returns 0.

Input:
A:v4.9
B:v3.19

Before: 0.09 s
 After: 0.09 s

To test the negative case, add a new commit with parent v3.19,
regenerate the commit-graph, and then run with B pointing at that
commit.

Before: 0.43 s
 After: 0.09 s

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index 940fbf2e17..f5858944fd 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -366,20 +366,11 @@ void reduce_heads_replace(struct commit_list **heads)
 	*heads = result;
 }
 
-static void unmark_and_free(struct commit_list *list, unsigned int mark)
-{
-	while (list) {
-		struct commit *commit = pop_commit(&list);
-		commit->object.flags &= ~mark;
-	}
-}
-
 int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
 {
 	struct object *o;
 	struct commit *old_commit, *new_commit;
-	struct commit_list *list, *used;
-	int found = 0;
+	struct commit_list *old_commit_list = NULL;
 
 	/*
 	 * Both new_commit and old_commit must be commit-ish and new_commit is descendant of
@@ -400,19 +391,8 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid)
 	if (parse_commit(new_commit) < 0)
 		return 0;
 
-	used = list = NULL;
-	commit_list_insert(new_commit, &list);
-	while (list) {
-		new_commit = pop_most_recent_commit(&list, TMP_MARK);
-		commit_list_insert(new_commit, &used);
-		if (new_commit == old_commit) {
-			found = 1;
-			break;
-		}
-	}
-	unmark_and_free(list, TMP_MARK);
-	unmark_and_free(used, TMP_MARK);
-	return found;
+	commit_list_insert(old_commit, &old_commit_list);
+	return is_descendant_of(new_commit, old_commit_list);
 }
 
 /*
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (15 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 16/18] commit-reach: replace ref_newer logic Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-23 20:41     ` Jonathan Tan
  2018-09-12  4:14     ` Jeff King
  2018-07-20 16:33   ` [PATCH v2 18/18] commit-reach: use can_all_from_reach Derrick Stolee
                     ` (4 subsequent siblings)
  21 siblings, 2 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The can_all_from_reach_with_flags() algorithm is currently quadratic in
the worst case, because it calls the reachable() method for every 'from'
without tracking which commits have already been walked or which can
already reach a commit in 'to'.

Rewrite the algorithm to walk each commit a constant number of times.

We also add some optimizations that should work for the main consumer of
this method: fetch negotitation (haves/wants).

The first step includes using a depth-first-search (DFS) from each
'from' commit, sorted by ascending generation number. We do not walk
beyond the minimum generation number or the minimum commit date. This
DFS is likely to be faster than the existing reachable() method because
we expect previous ref values to be along the first-parent history.

If we find a target commit, then we mark everything in the DFS stack as
a RESULT. This expands the set of targets for the other 'from' commits.
We also mark the visited commits using 'assign_flag' to prevent re-
walking the same commits.

We still need to clear our flags at the end, which is why we will have a
total of three visits to each commit.

Performance was measured on the Linux repository using
'test-tool reach can_all_from_reach'. The input included rows seeded by
tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as
v3.[0-9]*. This mimics a (very large) fetch that says "I have all major
v3 releases and want all major v4 releases." The "large" case included
X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate
tags to the set, which does not greatly increase the number of objects
that are considered, but does increase the number of 'from' commits,
demonstrating the quadratic nature of the previous code.

Small Case:

Before: 1.52 s
 After: 0.26 s

Large Case:

Before: 3.50 s
 After: 0.27 s

Note how the time increases between the two cases in the two versions.
The new code increases relative to the number of commits that need to be
walked, but not directly relative to the number of 'from' commits.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-reach.c | 122 ++++++++++++++++++++++++++++++-------------------
 commit-reach.h |   9 ++--
 upload-pack.c  |   5 +-
 3 files changed, 83 insertions(+), 53 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index f5858944fd..bc522d6840 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -514,66 +514,87 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 	return is_descendant_of(commit, list);
 }
 
-int reachable(struct commit *from, unsigned int with_flag,
-	      unsigned int assign_flag, time_t min_commit_date)
+static int compare_commits_by_gen(const void *_a, const void *_b)
 {
-	struct prio_queue work = { compare_commits_by_commit_date };
+	const struct commit *a = (const struct commit *)_a;
+	const struct commit *b = (const struct commit *)_b;
 
-	prio_queue_put(&work, from);
-	while (work.nr) {
-		struct commit_list *list;
-		struct commit *commit = prio_queue_get(&work);
-
-		if (commit->object.flags & with_flag) {
-			from->object.flags |= assign_flag;
-			break;
-		}
-		if (!commit->object.parsed)
-			parse_object(the_repository, &commit->object.oid);
-		if (commit->object.flags & REACHABLE)
-			continue;
-		commit->object.flags |= REACHABLE;
-		if (commit->date < min_commit_date)
-			continue;
-		for (list = commit->parents; list; list = list->next) {
-			struct commit *parent = list->item;
-			if (!(parent->object.flags & REACHABLE))
-				prio_queue_put(&work, parent);
-		}
-	}
-	from->object.flags |= REACHABLE;
-	clear_commit_marks(from, REACHABLE);
-	clear_prio_queue(&work);
-	return (from->object.flags & assign_flag);
+	if (a->generation < b->generation)
+		return -1;
+	if (a->generation > b->generation)
+		return 1;
+	return 0;
 }
 
 int can_all_from_reach_with_flag(struct object_array *from,
 				 unsigned int with_flag,
 				 unsigned int assign_flag,
-				 time_t min_commit_date)
+				 time_t min_commit_date,
+				 uint32_t min_generation)
 {
+	struct commit **list = NULL;
 	int i;
+	int result = 1;
 
+	ALLOC_ARRAY(list, from->nr);
 	for (i = 0; i < from->nr; i++) {
-		struct object *from_one = from->objects[i].item;
+		list[i] = (struct commit *)from->objects[i].item;
 
-		if (from_one->flags & assign_flag)
-			continue;
-		from_one = deref_tag(the_repository, from_one, "a from object", 0);
-		if (!from_one || from_one->type != OBJ_COMMIT) {
-			/* no way to tell if this is reachable by
-			 * looking at the ancestry chain alone, so
-			 * leave a note to ourselves not to worry about
-			 * this object anymore.
-			 */
-			from->objects[i].item->flags |= assign_flag;
-			continue;
-		}
-		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
-			       min_commit_date))
+		if (parse_commit(list[i]) ||
+		    list[i]->generation < min_generation)
 			return 0;
 	}
-	return 1;
+
+	QSORT(list, from->nr, compare_commits_by_gen);
+
+	for (i = 0; i < from->nr; i++) {
+		/* DFS from list[i] */
+		struct commit_list *stack = NULL;
+
+		list[i]->object.flags |= assign_flag;
+		commit_list_insert(list[i], &stack);
+
+		while (stack) {
+			struct commit_list *parent;
+
+			if (stack->item->object.flags & with_flag) {
+				pop_commit(&stack);
+				continue;
+			}
+
+			for (parent = stack->item->parents; parent; parent = parent->next) {
+				if (parent->item->object.flags & (with_flag | RESULT))
+					stack->item->object.flags |= RESULT;
+
+				if (!(parent->item->object.flags & assign_flag)) {
+					parent->item->object.flags |= assign_flag;
+
+					if (parse_commit(parent->item) ||
+					    parent->item->date < min_commit_date ||
+					    parent->item->generation < min_generation)
+						continue;
+
+					commit_list_insert(parent->item, &stack);
+					break;
+				}
+			}
+
+			if (!parent)
+				pop_commit(&stack);
+		}
+
+		if (!(list[i]->object.flags & (with_flag | RESULT))) {
+			result = 0;
+			goto cleanup;
+		}
+	}
+
+cleanup:
+	for (i = 0; i < from->nr; i++) {
+		clear_commit_marks(list[i], RESULT);
+		clear_commit_marks(list[i], assign_flag);
+	}
+	return result;
 }
 
 int can_all_from_reach(struct commit_list *from, struct commit_list *to,
@@ -583,6 +604,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 	time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0;
 	struct commit_list *from_iter = from, *to_iter = to;
 	int result;
+	uint32_t min_generation = GENERATION_NUMBER_INFINITY;
 
 	while (from_iter) {
 		add_object_array(&from_iter->item->object, NULL, &from_objs);
@@ -590,6 +612,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 		if (!parse_commit(from_iter->item)) {
 			if (from_iter->item->date < min_commit_date)
 				min_commit_date = from_iter->item->date;
+
+			if (from_iter->item->generation < min_generation)
+				min_generation = from_iter->item->generation;
 		}
 
 		from_iter = from_iter->next;
@@ -599,6 +624,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 		if (!parse_commit(to_iter->item)) {
 			if (to_iter->item->date < min_commit_date)
 				min_commit_date = to_iter->item->date;
+
+			if (to_iter->item->generation < min_generation)
+				min_generation = to_iter->item->generation;
 		}
 
 		to_iter->item->object.flags |= PARENT2;
@@ -607,7 +635,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 	}
 
 	result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1,
-					      min_commit_date);
+					      min_commit_date, min_generation);
 
 	while (from) {
 		clear_commit_marks(from->item, PARENT1);
diff --git a/commit-reach.h b/commit-reach.h
index aa202c9703..7d313e2975 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -59,19 +59,18 @@ define_commit_slab(contains_cache, enum contains_result);
 int commit_contains(struct ref_filter *filter, struct commit *commit,
 		    struct commit_list *list, struct contains_cache *cache);
 
-int reachable(struct commit *from, unsigned int with_flag,
-	      unsigned int assign_flag, time_t min_commit_date);
-
 /*
  * Determine if every commit in 'from' can reach at least one commit
  * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
  * as a marker for commits that are already visited. Do not walk
- * commits with date below 'min_commit_date'.
+ * commits with date below 'min_commit_date' or generation below
+ * 'min_generation'.
  */
 int can_all_from_reach_with_flag(struct object_array *from,
 				 unsigned int with_flag,
 				 unsigned int assign_flag,
-				 time_t min_commit_date);
+				 time_t min_commit_date,
+				 uint32_t min_generation);
 int can_all_from_reach(struct commit_list *from, struct commit_list *to,
 		       int commit_date_cutoff);
 
diff --git a/upload-pack.c b/upload-pack.c
index 11c426685d..1e498f1188 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -338,11 +338,14 @@ static int got_oid(const char *hex, struct object_id *oid)
 
 static int ok_to_give_up(void)
 {
+	uint32_t min_generation = GENERATION_NUMBER_ZERO;
+
 	if (!have_obj.nr)
 		return 0;
 
 	return can_all_from_reach_with_flag(&want_obj, THEY_HAVE,
-					    COMMON_KNOWN, oldest_have);
+					    COMMON_KNOWN, oldest_have,
+					    min_generation);
 }
 
 static int get_common_commits(void)
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* [PATCH v2 18/18] commit-reach: use can_all_from_reach
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (16 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear Derrick Stolee
@ 2018-07-20 16:33   ` Derrick Stolee
  2018-07-20 17:10   ` [PATCH v2 00/18] Consolidate reachability logic Stefan Beller
                     ` (3 subsequent siblings)
  21 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 16:33 UTC (permalink / raw)
  To: git@vger.kernel.org
  Cc: sbeller@google.com, stolee@gmail.com, jonathantanmy@google.com,
	gitster@pobox.com, Derrick Stolee

The is_descendant_of method previously used in_merge_bases() to check if
the commit can reach any of the commits in the provided list. This had
two performance problems:

1. The performance is quadratic in worst-case.

2. A single in_merge_bases() call requires walking beyond the target
   commit in order to find the full set of boundary commits that may be
   merge-bases.

The can_all_from_reach method avoids this quadratic behavior and can
limit the search beyond the target commits using generation numbers. It
requires a small prototype adjustment to stop using commit-date as a
cutoff, as that optimization is no longer appropriate here.

Since in_merge_bases() uses paint_down_to_common(), is_descendant_of()
naturally found cutoffs to avoid walking the entire commit graph. Since
we want to always return the correct result, we cannot use the
min_commit_date cutoff in can_all_from_reach. We then rely on generation
numbers to provide the cutoff.

Since not all repos will have a commit-graph file, nor will we always
have generation numbers computed for a commit-graph file, create a new
method, generation_numbers_enabled(), that checks for a commit-graph
file and sees if the first commit in the file has a non-zero generation
number. In the case that we do not have generation numbers, use the old
logic for is_descendant_of().

Performance was meausured on a copy of the Linux repository using the
'test-tool reach is_descendant_of' command using this input:

A:v4.9
X:v4.10
X:v4.11
X:v4.12
X:v4.13
X:v4.14
X:v4.15
X:v4.16
X:v4.17
X.v3.0

Note that this input is tailored to demonstrate the quadratic nature of
the previous method, as it will compute merge-bases for v4.9 versus all
of the later versions before checking against v4.1.

Before: 0.26 s
 After: 0.21 s

Since we previously used the is_descendant_of method in the ref_newer
method, we also measured performance there using
'test-tool reach ref_newer' with this input:

A:v4.9
B:v3.19

Before: 0.10 s
 After: 0.08 s

By adding a new commit with parent v3.19, we test the non-reachable case
of ref_newer:

Before: 0.09 s
 After: 0.08 s

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 commit-graph.c | 18 ++++++++++++++++++
 commit-graph.h |  6 ++++++
 commit-reach.c | 24 +++++++++++++++++-------
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/commit-graph.c b/commit-graph.c
index b0a55ad128..e9786fa864 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -233,6 +233,24 @@ static int prepare_commit_graph(struct repository *r)
 	return !!r->objects->commit_graph;
 }
 
+int generation_numbers_enabled(struct repository *r)
+{
+	uint32_t first_generation;
+	struct commit_graph *g;
+	if (!prepare_commit_graph(r))
+	       return 0;
+
+	g = r->objects->commit_graph;
+
+	if (!g->num_commits)
+		return 0;
+
+	first_generation = get_be32(g->chunk_commit_data +
+				    g->hash_len + 8) >> 2;
+
+	return !!first_generation;
+}
+
 static void close_commit_graph(void)
 {
 	free_commit_graph(the_repository->objects->commit_graph);
diff --git a/commit-graph.h b/commit-graph.h
index 76e098934a..0de8f88316 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -51,6 +51,12 @@ struct commit_graph {
 
 struct commit_graph *load_commit_graph_one(const char *graph_file);
 
+/*
+ * Return 1 if and only if the repository has a commit-graph
+ * file and generation numbers are computed in that file.
+ */
+int generation_numbers_enabled(struct repository *r);
+
 void write_commit_graph_reachable(const char *obj_dir, int append);
 void write_commit_graph(const char *obj_dir,
 			struct string_list *pack_indexes,
diff --git a/commit-reach.c b/commit-reach.c
index bc522d6840..c996524032 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -277,15 +277,25 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit)
 {
 	if (!with_commit)
 		return 1;
-	while (with_commit) {
-		struct commit *other;
 
-		other = with_commit->item;
-		with_commit = with_commit->next;
-		if (in_merge_bases(other, commit))
-			return 1;
+	if (generation_numbers_enabled(the_repository)) {
+		struct commit_list *from_list = NULL;
+		int result;
+		commit_list_insert(commit, &from_list);
+		result = can_all_from_reach(from_list, with_commit, 0);
+		free_commit_list(from_list);
+		return result;
+	} else {
+		while (with_commit) {
+			struct commit *other;
+
+			other = with_commit->item;
+			with_commit = with_commit->next;
+			if (in_merge_bases(other, commit))
+				return 1;
+		}
+		return 0;
 	}
-	return 0;
 }
 
 /*
-- 
2.18.0.118.gd4f65b8d14


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (17 preceding siblings ...)
  2018-07-20 16:33   ` [PATCH v2 18/18] commit-reach: use can_all_from_reach Derrick Stolee
@ 2018-07-20 17:10   ` Stefan Beller
  2018-07-20 17:15     ` Derrick Stolee
  2018-07-20 17:18   ` Derrick Stolee
                     ` (2 subsequent siblings)
  21 siblings, 1 reply; 118+ messages in thread
From: Stefan Beller @ 2018-07-20 17:10 UTC (permalink / raw)
  To: Derrick Stolee; +Cc: git, Derrick Stolee, Jonathan Tan, Junio C Hamano

Hi Derrick,

> V2 Update: The biggest material change in this version is that we drop the
> method declarations from commit.h, which requires adding a lot of references
> to commit-reach.h across the codebase. This change is in a commit on its own.
> In addition, we have the following smaller changes:

Is there a remote available to get this series from?

> * Use 'unsigned int' for the flag variables.
>
> * Properly align the here-doc test input data.
>
> * Use single rev-parse commands in test output, and pipe the OIDs through 'sort'
>
> * Check output of parse_commit()
>
> * Update flag documentation in object.h
>
> * Add tests for commit_contains() including both algorithms.
>
> * Reduce size of "mixed-mode" commit-graph to ensure we start commit walks
>   'above' the graph and then walk into the commits with generation numbers.

A range diff would be nice (though I can just look at all patches again
even if it takes longer).

I notice this is not sent via the GGG, but via git send-email?

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 17:10   ` [PATCH v2 00/18] Consolidate reachability logic Stefan Beller
@ 2018-07-20 17:15     ` Derrick Stolee
  2018-07-20 22:16       ` Stefan Beller
  0 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 17:15 UTC (permalink / raw)
  To: Stefan Beller, Derrick Stolee; +Cc: git, Jonathan Tan, Junio C Hamano

On 7/20/2018 1:10 PM, Stefan Beller wrote:
> Hi Derrick,
>
>> V2 Update: The biggest material change in this version is that we drop the
>> method declarations from commit.h, which requires adding a lot of references
>> to commit-reach.h across the codebase. This change is in a commit on its own.
>> In addition, we have the following smaller changes:
> Is there a remote available to get this series from?

Sure! It's on my fork [1]

[1] https://github.com/derrickstolee/git/tree/reach/refactor


>> * Use 'unsigned int' for the flag variables.
>>
>> * Properly align the here-doc test input data.
>>
>> * Use single rev-parse commands in test output, and pipe the OIDs through 'sort'
>>
>> * Check output of parse_commit()
>>
>> * Update flag documentation in object.h
>>
>> * Add tests for commit_contains() including both algorithms.
>>
>> * Reduce size of "mixed-mode" commit-graph to ensure we start commit walks
>>    'above' the graph and then walk into the commits with generation numbers.
> A range diff would be nice (though I can just look at all patches again
> even if it takes longer).

I can send a diff. It's a bit big because of the indenting changes.

> I notice this is not sent via the GGG, but via git send-email?

GGG can do version updates, but is currently having trouble when the 
branch has conflicts with the target [2]. We will address this issue 
next week, but I wanted to get this version out. Thank you for your 
patience as we work out the kinks.

[2] https://github.com/gitgitgadget/gitgitgadget/issues/25

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (18 preceding siblings ...)
  2018-07-20 17:10   ` [PATCH v2 00/18] Consolidate reachability logic Stefan Beller
@ 2018-07-20 17:18   ` Derrick Stolee
  2018-07-20 18:09     ` Eric Sunshine
  2018-07-20 17:41   ` Duy Nguyen
  2018-07-20 22:45   ` Junio C Hamano
  21 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 17:18 UTC (permalink / raw)
  To: git@vger.kernel.org; +Cc: stolee@gmail.com, sbeller@google.com

Here is the diff between v1 and v2.

Thanks,
-Stolee

---

diff --git a/bisect.c b/bisect.c
index e1275ba79e..d023543c91 100644
--- a/bisect.c
+++ b/bisect.c
@@ -13,6 +13,7 @@
 #include "sha1-array.h"
 #include "argv-array.h"
 #include "commit-slab.h"
+#include "commit-reach.h"
 
 static struct oid_array good_revs;
 static struct oid_array skipped_revs;
diff --git a/builtin/branch.c b/builtin/branch.c
index a50632fb23..9a787447f4 100644
--- a/builtin/branch.c
+++ b/builtin/branch.c
@@ -23,6 +23,7 @@
 #include "ref-filter.h"
 #include "worktree.h"
 #include "help.h"
+#include "commit-reach.h"
 
 static const char * const builtin_branch_usage[] = {
 	N_("git branch [<options>] [-r | -a] [--merged | --no-merged]"),
diff --git a/builtin/commit.c b/builtin/commit.c
index 158e3f843a..b5c608458e 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -33,6 +33,7 @@
 #include "sequencer.h"
 #include "mailmap.h"
 #include "help.h"
+#include "commit-reach.h"
 
 static const char * const builtin_commit_usage[] = {
 	N_("git commit [<options>] [--] <pathspec>..."),
diff --git a/builtin/fetch.c b/builtin/fetch.c
index f5d960baec..7de234774b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -22,6 +22,7 @@
 #include "utf8.h"
 #include "packfile.h"
 #include "list-objects-filter-options.h"
+#include "commit-reach.h"
 
 static const char * const builtin_fetch_usage[] = {
 	N_("git fetch [<options>] [<repository> [<refspec>...]]"),
diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index ff165c0fcd..7277d557b2 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -12,6 +12,7 @@
 #include "fmt-merge-msg.h"
 #include "gpg-interface.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 static const char * const fmt_merge_msg_usage[] = {
 	N_("git fmt-merge-msg [-m <message>] [--log[=<n>] | --no-log] [--file <file>]"),
diff --git a/builtin/log.c b/builtin/log.c
index 55a6286d7f..333d97c692 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -31,6 +31,7 @@
 #include "progress.h"
 #include "commit-slab.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 #define MAIL_DEFAULT_WRAP 72
 
diff --git a/builtin/merge-base.c b/builtin/merge-base.c
index 08d91b1f0c..1c92099070 100644
--- a/builtin/merge-base.c
+++ b/builtin/merge-base.c
@@ -7,6 +7,7 @@
 #include "revision.h"
 #include "parse-options.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 static int show_merge_base(struct commit **rev, int rev_nr, int show_all)
 {
diff --git a/builtin/merge.c b/builtin/merge.c
index d1b547d973..4c601c40a2 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -36,6 +36,7 @@
 #include "packfile.h"
 #include "tag.h"
 #include "alias.h"
+#include "commit-reach.h"
 
 #define DEFAULT_TWOHEAD (1<<0)
 #define DEFAULT_OCTOPUS (1<<1)
diff --git a/builtin/pull.c b/builtin/pull.c
index 4e78935392..15ad010968 100644
--- a/builtin/pull.c
+++ b/builtin/pull.c
@@ -22,6 +22,7 @@
 #include "tempfile.h"
 #include "lockfile.h"
 #include "wt-status.h"
+#include "commit-reach.h"
 
 enum rebase_type {
 	REBASE_INVALID = -1,
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 400d31c18c..d8467f9734 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -27,6 +27,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "protocol.h"
+#include "commit-reach.h"
 
 static const char * const receive_pack_usage[] = {
 	N_("git receive-pack <git-dir>"),
diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index 0f09bbbf65..455f62246d 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -14,6 +14,7 @@
 #include "revision.h"
 #include "split-index.h"
 #include "submodule.h"
+#include "commit-reach.h"
 
 #define DO_REVS		1
 #define DO_NOREV	2
diff --git a/commit-reach.c b/commit-reach.c
index 9eb6225403..c996524032 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -4,6 +4,7 @@
 #include "decorate.h"
 #include "prio-queue.h"
 #include "tree.h"
+#include "ref-filter.c"
 #include "revision.h"
 #include "tag.h"
 #include "commit-reach.h"
@@ -536,7 +537,8 @@ static int compare_commits_by_gen(const void *_a, const void *_b)
 }
 
 int can_all_from_reach_with_flag(struct object_array *from,
-				 int with_flag, int assign_flag,
+				 unsigned int with_flag,
+				 unsigned int assign_flag,
 				 time_t min_commit_date,
 				 uint32_t min_generation)
 {
@@ -548,9 +550,8 @@ int can_all_from_reach_with_flag(struct object_array *from,
 	for (i = 0; i < from->nr; i++) {
 		list[i] = (struct commit *)from->objects[i].item;
 
-		parse_commit(list[i]);
-
-		if (list[i]->generation < min_generation)
+		if (parse_commit(list[i]) ||
+		    list[i]->generation < min_generation)
 			return 0;
 	}
 
@@ -578,9 +579,8 @@ int can_all_from_reach_with_flag(struct object_array *from,
 				if (!(parent->item->object.flags & assign_flag)) {
 					parent->item->object.flags |= assign_flag;
 
-					parse_commit(parent->item);
-
-					if (parent->item->date < min_commit_date ||
+					if (parse_commit(parent->item) ||
+					    parent->item->date < min_commit_date ||
 					    parent->item->generation < min_generation)
 						continue;
 
diff --git a/commit-reach.h b/commit-reach.h
index 482d9eb5dd..7d313e2975 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -1,9 +1,12 @@
 #ifndef __COMMIT_REACH_H__
 #define __COMMIT_REACH_H__
 
-#include "commit.h"
 #include "commit-slab.h"
-#include "ref-filter.h"
+
+struct commit;
+struct commit_list;
+struct contains_cache;
+struct ref_filter;
 
 struct commit_list *get_merge_bases_many(struct commit *one,
 					 int n,
@@ -56,9 +59,6 @@ define_commit_slab(contains_cache, enum contains_result);
 int commit_contains(struct ref_filter *filter, struct commit *commit,
 		    struct commit_list *list, struct contains_cache *cache);
 
-int reachable(struct commit *from, int with_flag, int assign_flag,
-	      time_t min_commit_date);
-
 /*
  * Determine if every commit in 'from' can reach at least one commit
  * that is marked with 'with_flag'. As we traverse, use 'assign_flag'
@@ -67,7 +67,8 @@ int reachable(struct commit *from, int with_flag, int assign_flag,
  * 'min_generation'.
  */
 int can_all_from_reach_with_flag(struct object_array *from,
-				 int with_flag, int assign_flag,
+				 unsigned int with_flag,
+				 unsigned int assign_flag,
 				 time_t min_commit_date,
 				 uint32_t min_generation);
 int can_all_from_reach(struct commit_list *from, struct commit_list *to,
diff --git a/commit.h b/commit.h
index da0db36eba..e2c99d9b04 100644
--- a/commit.h
+++ b/commit.h
@@ -204,13 +204,6 @@ struct commit_graft *read_graft_line(struct strbuf *line);
 int register_commit_graft(struct repository *r, struct commit_graft *, int);
 struct commit_graft *lookup_commit_graft(struct repository *r, const struct object_id *oid);
 
-extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2);
-extern struct commit_list *get_merge_bases_many(struct commit *one, int n, struct commit **twos);
-extern struct commit_list *get_octopus_merge_bases(struct commit_list *in);
-
-/* To be used only when object flags after this call no longer matter */
-extern struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos);
-
 /* largest positive number a signed 32-bit integer can contain */
 #define INFINITE_DEPTH 0x7fffffff
 
@@ -258,32 +251,10 @@ extern int delayed_reachability_test(struct shallow_info *si, int c);
 extern void prune_shallow(int show_only);
 extern struct trace_key trace_shallow;
 
-int is_descendant_of(struct commit *, struct commit_list *);
-int in_merge_bases(struct commit *, struct commit *);
-int in_merge_bases_many(struct commit *, int, struct commit **);
-
 extern int interactive_add(int argc, const char **argv, const char *prefix, int patch);
 extern int run_add_interactive(const char *revision, const char *patch_mode,
 			       const struct pathspec *pathspec);
 
-/*
- * Takes a list of commits and returns a new list where those
- * have been removed that can be reached from other commits in
- * the list. It is useful for, e.g., reducing the commits
- * randomly thrown at the git-merge command and removing
- * redundant commits that the user shouldn't have given to it.
- *
- * This function destroys the STALE bit of the commit objects'
- * flags.
- */
-extern struct commit_list *reduce_heads(struct commit_list *heads);
-
-/*
- * Like `reduce_heads()`, except it replaces the list. Use this
- * instead of `foo = reduce_heads(foo);` to avoid memory leaks.
- */
-extern void reduce_heads_replace(struct commit_list **heads);
-
 struct commit_extra_header {
 	struct commit_extra_header *next;
 	char *key;
diff --git a/contrib/coccinelle/commit.cocci b/contrib/coccinelle/commit.cocci
index a7e9215ffc..aec3345adb 100644
--- a/contrib/coccinelle/commit.cocci
+++ b/contrib/coccinelle/commit.cocci
@@ -12,7 +12,7 @@ expression c;
 
 // These excluded functions must access c->maybe_tree direcly.
 @@
-identifier f !~ "^(get_commit_tree|get_commit_tree_in_graph|load_tree_for_commit)$";
+identifier f !~ "^(get_commit_tree|get_commit_tree_in_graph_one|load_tree_for_commit)$";
 expression c;
 @@
   f(...) {...
diff --git a/http-push.c b/http-push.c
index e007cb5a6b..91fdc7e1d5 100644
--- a/http-push.c
+++ b/http-push.c
@@ -16,7 +16,6 @@
 #include "object-store.h"
 #include "commit-reach.h"
 
-
 #ifdef EXPAT_NEEDS_XMLPARSE_H
 #include <xmlparse.h>
 #else
diff --git a/merge-recursive.c b/merge-recursive.c
index 1dd6ec384d..8155dee9a9 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -27,6 +27,7 @@
 #include "dir.h"
 #include "submodule.h"
 #include "revision.h"
+#include "commit-reach.h"
 
 struct path_hashmap_entry {
 	struct hashmap_entry e;
diff --git a/notes-merge.c b/notes-merge.c
index 76ab19e702..12dfdf6c17 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -12,6 +12,7 @@
 #include "notes-merge.h"
 #include "strbuf.h"
 #include "notes-utils.h"
+#include "commit-reach.h"
 
 struct notes_merge_pair {
 	struct object_id obj, base, local, remote;
diff --git a/object.h b/object.h
index fa5ca97567..b132944c51 100644
--- a/object.h
+++ b/object.h
@@ -60,12 +60,12 @@ struct object_array {
  * revision.h:               0---------10                                26
  * fetch-pack.c:             0----5
  * walker.c:                 0-2
- * upload-pack.c:                4       11----------------19
+ * upload-pack.c:                4       11-----14  16-----19
  * builtin/blame.c:                        12-13
  * bisect.c:                                        16
  * bundle.c:                                        16
  * http-push.c:                                     16-----19
- * commit.c:                                        16-----19
+ * commit-reach.c:                                15-------19
  * sha1-name.c:                                              20
  * list-objects-filter.c:                                      21
  * builtin/fsck.c:           0--3
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 953c5dd84d..55bcab907c 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -11,6 +11,7 @@
 #include "pack-bitmap.h"
 #include "sha1-lookup.h"
 #include "pack-objects.h"
+#include "commit-reach.h"
 
 struct bitmapped_commit {
 	struct commit *commit;
diff --git a/ref-filter.c b/ref-filter.c
index 35b2d25ce5..495e830fa5 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -18,6 +18,7 @@
 #include "trailer.h"
 #include "wt-status.h"
 #include "commit-slab.h"
+#include "commit-graph.h"
 #include "commit-reach.h"
 
 static struct ref_msg {
diff --git a/revision.c b/revision.c
index 4dbe406bed..3205a3947a 100644
--- a/revision.c
+++ b/revision.c
@@ -24,6 +24,7 @@
 #include "packfile.h"
 #include "worktree.h"
 #include "argv-array.h"
+#include "commit-reach.h"
 
 volatile show_early_output_fn_t show_early_output;
 
diff --git a/sequencer.c b/sequencer.c
index d1d07bed5b..97bdfd48b4 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -30,6 +30,7 @@
 #include "oidset.h"
 #include "commit-slab.h"
 #include "alias.h"
+#include "commit-reach.h"
 
 #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION"
 
diff --git a/sha1-name.c b/sha1-name.c
index 009faab4ae..7215b30b88 100644
--- a/sha1-name.c
+++ b/sha1-name.c
@@ -12,6 +12,7 @@
 #include "packfile.h"
 #include "object-store.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 static int get_oid_oneline(const char *, struct object_id *, struct commit_list *);
 
diff --git a/shallow.c b/shallow.c
index dbe8a2a290..99fd2d1ba0 100644
--- a/shallow.c
+++ b/shallow.c
@@ -16,6 +16,7 @@
 #include "list-objects.h"
 #include "commit-slab.h"
 #include "repository.h"
+#include "commit-reach.h"
 
 void set_alternate_shallow_file(struct repository *r, const char *path, int override)
 {
diff --git a/submodule.c b/submodule.c
index 6688dd5d45..6650ed7aa0 100644
--- a/submodule.c
+++ b/submodule.c
@@ -22,6 +22,7 @@
 #include "worktree.h"
 #include "parse-options.h"
 #include "object-store.h"
+#include "commit-reach.h"
 
 static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
 static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP;
diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c
index dc97100992..eb21103998 100644
--- a/t/helper/test-reach.c
+++ b/t/helper/test-reach.c
@@ -1,10 +1,31 @@
 #include "test-tool.h"
 #include "cache.h"
+#include "commit.h"
 #include "commit-reach.h"
 #include "config.h"
 #include "parse-options.h"
+#include "ref-filter.h"
+#include "string-list.h"
 #include "tag.h"
 
+static void print_sorted_commit_ids(struct commit_list *list)
+{
+	int i;
+	struct string_list s = STRING_LIST_INIT_DUP;
+
+	while (list) {
+		string_list_append(&s, oid_to_hex(&list->item->object.oid));
+		list = list->next;
+	}
+
+	string_list_sort(&s);
+
+	for (i = 0; i < s.nr; i++)
+		printf("%s\n", s.items[i].string);
+
+	string_list_clear(&s, 0);
+}
+
 int cmd__reach(int ac, const char **av)
 {
 	struct object_id oid_A, oid_B;
@@ -77,7 +98,7 @@ int cmd__reach(int ac, const char **av)
 	strbuf_release(&buf);
 
 	if (!strcmp(av[1], "ref_newer"))
-		printf("%s:%d\n", av[1], ref_newer(&oid_A, &oid_B));
+		printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B));
 	else if (!strcmp(av[1], "in_merge_bases"))
 		printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B));
 	else if (!strcmp(av[1], "is_descendant_of"))
@@ -85,19 +106,24 @@ int cmd__reach(int ac, const char **av)
 	else if (!strcmp(av[1], "get_merge_bases_many")) {
 		struct commit_list *list = get_merge_bases_many(A, X_nr, X_array);
 		printf("%s(A,X):\n", av[1]);
-		while (list) {
-			printf("%s\n", oid_to_hex(&list->item->object.oid));
-			list = list->next;
-		}
+		print_sorted_commit_ids(list);
 	} else if (!strcmp(av[1], "reduce_heads")) {
 		struct commit_list *list = reduce_heads(X);
 		printf("%s(X):\n", av[1]);
-		while (list) {
-			printf("%s\n", oid_to_hex(&list->item->object.oid));
-			list = list->next;
-		}
+		print_sorted_commit_ids(list);
 	} else if (!strcmp(av[1], "can_all_from_reach")) {
 		printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1));
+	} else if (!strcmp(av[1], "commit_contains")) {
+		struct ref_filter filter;
+		struct contains_cache cache;
+		init_contains_cache(&cache);
+
+		if (ac > 2 && !strcmp(av[2], "--tag"))
+			filter.with_commit_tag_algo = 1;
+		else
+			filter.with_commit_tag_algo = 0;
+
+		printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache));
 	}
 
 	exit(0);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index cb07c64310..d139a00d1d 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -23,7 +23,7 @@ test_description='basic commit reachability tests'
 #              \    /
 #              (1,1)
 #
-# We use branch 'comit-x-y' to refer to (x,y).
+# We use branch 'commit-x-y' to refer to (x,y).
 # This grid allows interesting reachability and
 # non-reachability queries: (x,y) can reach (x',y')
 # if and only if x' <= x and y' <= y.
@@ -48,7 +48,7 @@ test_expect_success 'setup' '
 	done &&
 	git commit-graph write --reachable &&
 	mv .git/objects/info/commit-graph commit-graph-full &&
-	git show-ref -s commit-7-7 | git commit-graph write --stdin-commits &&
+	git show-ref -s commit-5-5 | git commit-graph write --stdin-commits &&
 	mv .git/objects/info/commit-graph commit-graph-half &&
 	git config core.commitGraph true
 '
@@ -67,142 +67,176 @@ test_three_modes () {
 
 test_expect_success 'ref_newer:miss' '
 	cat >input <<-\EOF &&
-		A:commit-5-7
-		B:commit-4-9
+	A:commit-5-7
+	B:commit-4-9
 	EOF
-	printf "ref_newer:0\n" >expect &&
+	echo "ref_newer(A,B):0" >expect &&
 	test_three_modes ref_newer
 '
 
 test_expect_success 'ref_newer:hit' '
 	cat >input <<-\EOF &&
-		A:commit-5-7
-		B:commit-2-3
+	A:commit-5-7
+	B:commit-2-3
 	EOF
-	printf "ref_newer:1\n" >expect &&
+	echo "ref_newer(A,B):1" >expect &&
 	test_three_modes ref_newer
 '
 
 test_expect_success 'in_merge_bases:hit' '
-	cat >input <<- EOF &&
-		A:commit-5-7
-		B:commit-8-8
+	cat >input <<-\EOF &&
+	A:commit-5-7
+	B:commit-8-8
 	EOF
-	printf "in_merge_bases(A,B):1\n" >expect &&
+	echo "in_merge_bases(A,B):1" >expect &&
 	test_three_modes in_merge_bases
 '
 
 test_expect_success 'in_merge_bases:miss' '
-	cat >input <<- EOF &&
-		A:commit-6-8
-		B:commit-5-9
+	cat >input <<-\EOF &&
+	A:commit-6-8
+	B:commit-5-9
 	EOF
-	printf "in_merge_bases(A,B):0\n" >expect &&
+	echo "in_merge_bases(A,B):0" >expect &&
 	test_three_modes in_merge_bases
 '
 
 test_expect_success 'is_descendant_of:hit' '
 	cat >input <<-\EOF &&
-		A:commit-5-7
-		X:commit-4-8
-		X:commit-6-6
-		X:commit-1-1
+	A:commit-5-7
+	X:commit-4-8
+	X:commit-6-6
+	X:commit-1-1
 	EOF
-	printf "is_descendant_of(A,X):1\n" >expect &&
+	echo "is_descendant_of(A,X):1" >expect &&
 	test_three_modes is_descendant_of
 '
 
 test_expect_success 'is_descendant_of:miss' '
 	cat >input <<-\EOF &&
-		A:commit-6-8
-		X:commit-5-9
-		X:commit-4-10
-		X:commit-7-6
+	A:commit-6-8
+	X:commit-5-9
+	X:commit-4-10
+	X:commit-7-6
 	EOF
-	printf "is_descendant_of(A,X):0\n" >expect &&
+	echo "is_descendant_of(A,X):0" >expect &&
 	test_three_modes is_descendant_of
 '
 
 test_expect_success 'get_merge_bases_many' '
 	cat >input <<-\EOF &&
-		A:commit-5-7
-		X:commit-4-8
-		X:commit-6-6
-		X:commit-8-3
+	A:commit-5-7
+	X:commit-4-8
+	X:commit-6-6
+	X:commit-8-3
 	EOF
 	{
-		printf "get_merge_bases_many(A,X):\n" &&
-		git rev-parse commit-5-6 &&
-		git rev-parse commit-4-7
+		echo "get_merge_bases_many(A,X):" &&
+		git rev-parse commit-5-6 \
+			      commit-4-7 | sort
 	} >expect &&
 	test_three_modes get_merge_bases_many
 '
 
 test_expect_success 'reduce_heads' '
 	cat >input <<-\EOF &&
-		X:commit-1-10
-		X:commit-2-8
-		X:commit-3-6
-		X:commit-4-4
-		X:commit-1-7
-		X:commit-2-5
-		X:commit-3-3
-		X:commit-5-1
+	X:commit-1-10
+	X:commit-2-8
+	X:commit-3-6
+	X:commit-4-4
+	X:commit-1-7
+	X:commit-2-5
+	X:commit-3-3
+	X:commit-5-1
 	EOF
 	{
-		printf "reduce_heads(X):\n" &&
-		git rev-parse commit-5-1 &&
-		git rev-parse commit-4-4 &&
-		git rev-parse commit-3-6 &&
-		git rev-parse commit-2-8 &&
-		git rev-parse commit-1-10
+		echo "reduce_heads(X):" &&
+		git rev-parse commit-5-1 \
+			      commit-4-4 \
+			      commit-3-6 \
+			      commit-2-8 \
+			      commit-1-10 | sort
 	} >expect &&
 	test_three_modes reduce_heads
 '
 
 test_expect_success 'can_all_from_reach:hit' '
 	cat >input <<-\EOF &&
-		X:commit-2-10
-		X:commit-3-9
-		X:commit-4-8
-		X:commit-5-7
-		X:commit-6-6
-		X:commit-7-5
-		X:commit-8-4
-		X:commit-9-3
-		Y:commit-1-9
-		Y:commit-2-8
-		Y:commit-3-7
-		Y:commit-4-6
-		Y:commit-5-5
-		Y:commit-6-4
-		Y:commit-7-3
-		Y:commit-8-1
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	Y:commit-1-9
+	Y:commit-2-8
+	Y:commit-3-7
+	Y:commit-4-6
+	Y:commit-5-5
+	Y:commit-6-4
+	Y:commit-7-3
+	Y:commit-8-1
 	EOF
-	printf "can_all_from_reach(X,Y):1\n" >expect &&
+	echo "can_all_from_reach(X,Y):1" >expect &&
 	test_three_modes can_all_from_reach
 '
 
 test_expect_success 'can_all_from_reach:miss' '
 	cat >input <<-\EOF &&
-		X:commit-2-10
-		X:commit-3-9
-		X:commit-4-8
-		X:commit-5-7
-		X:commit-6-6
-		X:commit-7-5
-		X:commit-8-4
-		X:commit-9-3
-		Y:commit-1-9
-		Y:commit-2-8
-		Y:commit-3-7
-		Y:commit-4-6
-		Y:commit-5-5
-		Y:commit-6-4
-		Y:commit-8-5
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	Y:commit-1-9
+	Y:commit-2-8
+	Y:commit-3-7
+	Y:commit-4-6
+	Y:commit-5-5
+	Y:commit-6-4
+	Y:commit-8-5
 	EOF
-	printf "can_all_from_reach(X,Y):0\n" >expect &&
+	echo "can_all_from_reach(X,Y):0" >expect &&
 	test_three_modes can_all_from_reach
 '
 
+test_expect_success 'commit_contains:hit' '
+	cat >input <<-\EOF &&
+	A:commit-7-7
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	EOF
+	echo "commit_contains(_,A,X,_):1" >expect &&
+	test_three_modes commit_contains &&
+	test_three_modes commit_contains --tag
+'
+
+test_expect_success 'commit_contains:miss' '
+	cat >input <<-\EOF &&
+	A:commit-6-5
+	X:commit-2-10
+	X:commit-3-9
+	X:commit-4-8
+	X:commit-5-7
+	X:commit-6-6
+	X:commit-7-5
+	X:commit-8-4
+	X:commit-9-3
+	EOF
+	echo "commit_contains(_,A,X,_):0" >expect &&
+	test_three_modes commit_contains &&
+	test_three_modes commit_contains --tag
+'
+
 test_done

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (19 preceding siblings ...)
  2018-07-20 17:18   ` Derrick Stolee
@ 2018-07-20 17:41   ` Duy Nguyen
  2018-07-20 19:09     ` Derrick Stolee
  2018-07-20 22:45   ` Junio C Hamano
  21 siblings, 1 reply; 118+ messages in thread
From: Duy Nguyen @ 2018-07-20 17:41 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: Git Mailing List, Stefan Beller, Derrick Stolee, Jonathan Tan,
	Junio C Hamano

On Fri, Jul 20, 2018 at 6:35 PM Derrick Stolee <dstolee@microsoft.com> wrote:
>
> There are many places in Git that use a commit walk to determine
> reachability between commits and/or refs. A lot of this logic is
> duplicated.
>
> I wanted to achieve the following:
>
> Consolidate several different commit walks into one file

I'm surprised get_shallow_commits() in shallow.c didn't make the cut.
It's no problem though if you already considered it and decided it was
better left alone.

> Reduce duplicate reachability logic
> Increase testability (correctness and performance)
> Improve performance of reachability queries

What's your recommendation on adding new commit reachability code? I
might have to add one to fix prune_shallow() if I don't find anything
fit. I guess the code should go to commit-reach.c too?
-- 
Duy

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 17:18   ` Derrick Stolee
@ 2018-07-20 18:09     ` Eric Sunshine
  2018-07-20 19:14       ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Eric Sunshine @ 2018-07-20 18:09 UTC (permalink / raw)
  To: Derrick Stolee; +Cc: Git List, Derrick Stolee, Stefan Beller

On Fri, Jul 20, 2018 at 1:20 PM Derrick Stolee <dstolee@microsoft.com> wrote:
> Here is the diff between v1 and v2.
>
> diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
> @@ -67,142 +67,176 @@ test_three_modes () {
>  test_expect_success 'get_merge_bases_many' '
>         cat >input <<-\EOF &&
> +       A:commit-5-7
> +       X:commit-4-8
> +       X:commit-6-6
> +       X:commit-8-3
>         EOF
>         {
> -               printf "get_merge_bases_many(A,X):\n" &&
> -               git rev-parse commit-5-6 &&
> -               git rev-parse commit-4-7
> +               echo "get_merge_bases_many(A,X):" &&
> +               git rev-parse commit-5-6 \
> +                             commit-4-7 | sort

Pipes lose the exit code of the all upstream commands. When a Git
command is upstream, we'd usually recommend to dump its output to a
file, then use the file as input to the rest of the pipe so as not to
lose the Git command's exit code:

    {
        ...
        git rev-parse ... >oids &&
        sort <oids
    } >expect &&

One could argue, in this case, that if git-rev-parse crashes, then it
won't have the expected output and the test will fail anyhow despite
not seeing its failed exit code. However, git-rev-parse might crash
_after_ emitting all the normal, expected output, and that crash would
be missed altogether, so avoiding git-rev-parse as a pipe upstream is
a good idea.

However, one could argue that argument by saying that it isn't the job
of this particular test script to check git-rev-parse's behavior, so
crashy git-rev-parse ought to be caught elsewhere by some other test
script. Nevertheless, you'll likely encounter reviewers who don't want
to see git-rev-parse upstream, even with that argument.

Anyhow, why is that 'sort' even there? It wasn't needed in the
original. Is git-rev-parse outputting the OID's in random order?

>         } >expect &&
>         test_three_modes get_merge_bases_many
>  '
>
>  test_expect_success 'reduce_heads' '
>         [...]
> +               git rev-parse commit-5-1 \
> +                             commit-4-4 \
> +                             commit-3-6 \
> +                             commit-2-8 \
> +                             commit-1-10 | sort

Ditto.

>         } >expect &&
>         test_three_modes reduce_heads
>  '

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 17:41   ` Duy Nguyen
@ 2018-07-20 19:09     ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 19:09 UTC (permalink / raw)
  To: Duy Nguyen, Derrick Stolee
  Cc: Git Mailing List, Stefan Beller, Jonathan Tan, Junio C Hamano

On 7/20/2018 1:41 PM, Duy Nguyen wrote:
> On Fri, Jul 20, 2018 at 6:35 PM Derrick Stolee <dstolee@microsoft.com> wrote:
>> There are many places in Git that use a commit walk to determine
>> reachability between commits and/or refs. A lot of this logic is
>> duplicated.
>>
>> I wanted to achieve the following:
>>
>> Consolidate several different commit walks into one file
> I'm surprised get_shallow_commits() in shallow.c didn't make the cut.
> It's no problem though if you already considered it and decided it was
> better left alone.

Thanks for pointing this out. I didn't know about it. It would make an 
excellent follow-up series.

>> Reduce duplicate reachability logic
>> Increase testability (correctness and performance)
>> Improve performance of reachability queries
> What's your recommendation on adding new commit reachability code? I
> might have to add one to fix prune_shallow() if I don't find anything
> fit. I guess the code should go to commit-reach.c too?

In my opinion, new commit walks should go into commit-reach.c. Then, you 
can justify why you are using a "new" walk instead of using an existing 
walk. Further, you can probably think of the walk in more generic terms 
than the specific application you need. Finally, you can use the 
'test-tool reach <method>' pattern to test the specific walk you create 
outside of the logic for which you needed it.

I understand that while this patch is under review, you will probably 
want to continue adding your walk where it is, then we can consolidate 
the code after both have settled.

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 18:09     ` Eric Sunshine
@ 2018-07-20 19:14       ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-20 19:14 UTC (permalink / raw)
  To: Eric Sunshine, Derrick Stolee; +Cc: Git List, Stefan Beller

On 7/20/2018 2:09 PM, Eric Sunshine wrote:
> On Fri, Jul 20, 2018 at 1:20 PM Derrick Stolee <dstolee@microsoft.com> wrote:
>> Here is the diff between v1 and v2.
>>
>> diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
>> @@ -67,142 +67,176 @@ test_three_modes () {
>>   test_expect_success 'get_merge_bases_many' '
>>          cat >input <<-\EOF &&
>> +       A:commit-5-7
>> +       X:commit-4-8
>> +       X:commit-6-6
>> +       X:commit-8-3
>>          EOF
>>          {
>> -               printf "get_merge_bases_many(A,X):\n" &&
>> -               git rev-parse commit-5-6 &&
>> -               git rev-parse commit-4-7
>> +               echo "get_merge_bases_many(A,X):" &&
>> +               git rev-parse commit-5-6 \
>> +                             commit-4-7 | sort
> Pipes lose the exit code of the all upstream commands. When a Git
> command is upstream, we'd usually recommend to dump its output to a
> file, then use the file as input to the rest of the pipe so as not to
> lose the Git command's exit code:
>
>      {
>          ...
>          git rev-parse ... >oids &&
>          sort <oids
>      } >expect &&

This approach seems fine to me. I'd hate to be in the case where 
rev-parse reports an error, terminating early, resulting in an incorrect 
expected file, and then having the test pass because the code is 
similarly incorrect. No matter how slim the chances are, I want to avoid 
a false positive there.

> One could argue, in this case, that if git-rev-parse crashes, then it
> won't have the expected output and the test will fail anyhow despite
> not seeing its failed exit code. However, git-rev-parse might crash
> _after_ emitting all the normal, expected output, and that crash would
> be missed altogether, so avoiding git-rev-parse as a pipe upstream is
> a good idea.
>
> However, one could argue that argument by saying that it isn't the job
> of this particular test script to check git-rev-parse's behavior, so
> crashy git-rev-parse ought to be caught elsewhere by some other test
> script. Nevertheless, you'll likely encounter reviewers who don't want
> to see git-rev-parse upstream, even with that argument.
>
> Anyhow, why is that 'sort' even there? It wasn't needed in the
> original. Is git-rev-parse outputting the OID's in random order?

Since the merge-base algorithms provide the commits in an order that 
depends on the implementation (not the functional contract), we decided 
to sort the output commit ids in the output of 'test-tool reach 
<method>'. Thus, we sort the rev-parse output to match.

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 17:15     ` Derrick Stolee
@ 2018-07-20 22:16       ` Stefan Beller
  2018-08-01 20:33         ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Stefan Beller @ 2018-07-20 22:16 UTC (permalink / raw)
  To: Derrick Stolee; +Cc: Derrick Stolee, git, Jonathan Tan, Junio C Hamano

Hi Derrick,

> Sure! It's on my fork [1]
>
> [1] https://github.com/derrickstolee/git/tree/reach/refactor
>

Thanks!

> >> * Use single rev-parse commands in test output, and pipe the OIDs through 'sort'

Why do we need to sort them? The order of the answers given by rev-parse
is the same as the input given and we did not need to sort it before, i.e.
the unit under test would not give sorted output but some deterministic(?)
order, which we can replicate as input to rev-parse.
Am I missing the obvious?

> >> * Check output of parse_commit()
> >>
> >> * Update flag documentation in object.h
> >>
> >> * Add tests for commit_contains() including both algorithms.
> >>
> >> * Reduce size of "mixed-mode" commit-graph to ensure we start commit walks
> >>    'above' the graph and then walk into the commits with generation numbers.

Overall I like the series as-is, and have found
no further issues in a quick read.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
                     ` (20 preceding siblings ...)
  2018-07-20 17:41   ` Duy Nguyen
@ 2018-07-20 22:45   ` Junio C Hamano
  21 siblings, 0 replies; 118+ messages in thread
From: Junio C Hamano @ 2018-07-20 22:45 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, stolee@gmail.com,
	jonathantanmy@google.com

Derrick Stolee <dstolee@microsoft.com> writes:

> There are many places in Git that use a commit walk to determine
> reachability between commits and/or refs. A lot of this logic is
> duplicated.
>
> I wanted to achieve the following:
>
> Consolidate several different commit walks into one file
> Reduce duplicate reachability logic
> Increase testability (correctness and performance)
> Improve performance of reachability queries

All of these are good goals to shoot at.

> This series is based on jt/commit-graph-per-object-store

As such, it has some interactions with another topic [*1*] that is
based on the same, but my trial merge seems to suggest that the
interactions are minimum and do not pose a serious problem.

Will push out as part of the next integration run, as I've already
pushed out today's.

Thanks.


[Footnote]

*1* ds/commit-graph-with-grafts topic that is in 'pu', slated to go
    'next' soonish.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 15/18] test-reach: test commit_contains
  2018-07-20 16:33   ` [PATCH v2 15/18] test-reach: test commit_contains Derrick Stolee
@ 2018-07-23 20:35     ` Jonathan Tan
  2018-07-25 18:08       ` Junio C Hamano
  0 siblings, 1 reply; 118+ messages in thread
From: Jonathan Tan @ 2018-07-23 20:35 UTC (permalink / raw)
  To: dstolee; +Cc: git, sbeller, stolee, jonathantanmy, gitster

> +	} else if (!strcmp(av[1], "commit_contains")) {
> +		struct ref_filter filter;
> +		struct contains_cache cache;
> +		init_contains_cache(&cache);
> +
> +		if (ac > 2 && !strcmp(av[2], "--tag"))
> +			filter.with_commit_tag_algo = 1;
> +		else
> +			filter.with_commit_tag_algo = 0;
> +
> +		printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache));

Should we initialize filter (with {NULL} or some equivalent)?

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear
  2018-07-20 16:33   ` [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear Derrick Stolee
@ 2018-07-23 20:41     ` Jonathan Tan
  2018-08-01 20:41       ` Derrick Stolee
  2018-09-12  4:14     ` Jeff King
  1 sibling, 1 reply; 118+ messages in thread
From: Jonathan Tan @ 2018-07-23 20:41 UTC (permalink / raw)
  To: dstolee; +Cc: git, sbeller, stolee, jonathantanmy, gitster

> +		if (parse_commit(list[i]) ||
> +		    list[i]->generation < min_generation)

Here...

> +					if (parse_commit(parent->item) ||
> +					    parent->item->date < min_commit_date ||
> +					    parent->item->generation < min_generation)

...and here, would parse_commit_or_die() be better? I think that a
function that returns a definitive answer (either the commits are
reachable or not) should die when the commits cannot be parsed.

Other than that, I've compared the commits in this version to v1, and
all my review comments have been addressed, thanks. (With the exception
of the skip_prefix() one, but that is a minor matter - I suggested that
to make it easier to implement my "Ancestor:" and "Descendant:"
suggestion which Stolee disagreed on with reason.)

[1] https://public-inbox.org/git/20180716230019.257742-1-jonathantanmy@google.com/

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 15/18] test-reach: test commit_contains
  2018-07-23 20:35     ` Jonathan Tan
@ 2018-07-25 18:08       ` Junio C Hamano
  2018-07-25 18:30         ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Junio C Hamano @ 2018-07-25 18:08 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: dstolee, git, sbeller, stolee

Jonathan Tan <jonathantanmy@google.com> writes:

>> +	} else if (!strcmp(av[1], "commit_contains")) {
>> +		struct ref_filter filter;
>> +		struct contains_cache cache;
>> +		init_contains_cache(&cache);
>> +
>> +		if (ac > 2 && !strcmp(av[2], "--tag"))
>> +			filter.with_commit_tag_algo = 1;
>> +		else
>> +			filter.with_commit_tag_algo = 0;
>> +
>> +		printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache));
>
> Should we initialize filter (with {NULL} or some equivalent)?

Sounds like a sensible suggestion.  Wouldn't we segfault otherwise
depending on what garbage bytes are on the stack?

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 15/18] test-reach: test commit_contains
  2018-07-25 18:08       ` Junio C Hamano
@ 2018-07-25 18:30         ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-07-25 18:30 UTC (permalink / raw)
  To: Junio C Hamano, Jonathan Tan; +Cc: dstolee, git, sbeller

On 7/25/2018 2:08 PM, Junio C Hamano wrote:
> Jonathan Tan <jonathantanmy@google.com> writes:
>
>>> +	} else if (!strcmp(av[1], "commit_contains")) {
>>> +		struct ref_filter filter;
>>> +		struct contains_cache cache;
>>> +		init_contains_cache(&cache);
>>> +
>>> +		if (ac > 2 && !strcmp(av[2], "--tag"))
>>> +			filter.with_commit_tag_algo = 1;
>>> +		else
>>> +			filter.with_commit_tag_algo = 0;
>>> +
>>> +		printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache));
>> Should we initialize filter (with {NULL} or some equivalent)?
> Sounds like a sensible suggestion.  Wouldn't we segfault otherwise
> depending on what garbage bytes are on the stack?

It's a good idea to initialize the struct properly, but the only part of 
the 'filter' struct that is accessed by that method is the 
'with_commit_tag_algo' member. Everything else is read from A, X, and cache.

Thanks,
-Stolee

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 00/18] Consolidate reachability logic
  2018-07-20 22:16       ` Stefan Beller
@ 2018-08-01 20:33         ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-08-01 20:33 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Derrick Stolee, git, Jonathan Tan, Junio C Hamano

On 7/20/2018 6:16 PM, Stefan Beller wrote:
>>>> * Use single rev-parse commands in test output, and pipe the OIDs through 'sort'
> Why do we need to sort them? The order of the answers given by rev-parse
> is the same as the input given and we did not need to sort it before, i.e.
> the unit under test would not give sorted output but some deterministic(?)
> order, which we can replicate as input to rev-parse.
> Am I missing the obvious?
The output of the test program is not always deterministic (or at least, 
the order is determined by the implementation, but not as part of the 
method contract). For example: get_all_merge_bases can return the list 
of merge bases in any order.

By sorting, we can ensure the output values (and their multiplicity) 
match expected.

Thanks,
-Stolee

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear
  2018-07-23 20:41     ` Jonathan Tan
@ 2018-08-01 20:41       ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-08-01 20:41 UTC (permalink / raw)
  To: Jonathan Tan, dstolee; +Cc: git, sbeller, gitster



On 7/23/2018 4:41 PM, Jonathan Tan wrote:
>> +		if (parse_commit(list[i]) ||
>> +		    list[i]->generation < min_generation)
> Here...
>
>> +					if (parse_commit(parent->item) ||
>> +					    parent->item->date < min_commit_date ||
>> +					    parent->item->generation < min_generation)
> ...and here, would parse_commit_or_die() be better? I think that a
> function that returns a definitive answer (either the commits are
> reachable or not) should die when the commits cannot be parsed.
I'm hesitant to add _or_die() here, when the previous implementation 
only used parse_object() or parse_commit(), so would not die when 
parsing fails. The same holds true for the other methods that call 
can_all_from_reach().

Thanks,
-Stolee

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter
  2018-07-20 16:33   ` [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter Derrick Stolee
@ 2018-08-28 21:24     ` Jonathan Nieder
  2018-08-28 21:33       ` Derrick Stolee
  2018-08-28 21:36       ` [PATCH] commit-reach: correct accidental #include of C file Jonathan Nieder
  0 siblings, 2 replies; 118+ messages in thread
From: Jonathan Nieder @ 2018-08-28 21:24 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, stolee@gmail.com,
	jonathantanmy@google.com, gitster@pobox.com

Hi,

Derrick Stolee wrote:

> There are several commit walks in the codebase. Group them together into
> a new commit-reach.c file and corresponding header. After we group these
> walks into one place, we can reduce duplicate logic by calling
> equivalent methods.
>
> All methods are direct moves, except we also make the commit_contains()
> method public so its consumers in ref-filter.c can still call it. We can
> also test this method in a test-tool in a later commit.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  commit-reach.c | 121 +++++++++++++++++++++++++++++++++++++++++
>  commit-reach.h |  20 ++++++-
>  ref-filter.c   | 145 +++----------------------------------------------
>  3 files changed, 147 insertions(+), 139 deletions(-)
> 
> diff --git a/commit-reach.c b/commit-reach.c
> index a6bc4781a6..01d796f011 100644
> --- a/commit-reach.c
> +++ b/commit-reach.c
> @@ -1,8 +1,10 @@
>  #include "cache.h"
>  #include "commit.h"
> +#include "commit-graph.h"
>  #include "decorate.h"
>  #include "prio-queue.h"
>  #include "tree.h"
> +#include "ref-filter.c"

Did you mean "ref-filter.h"?

This broke the build here.  Is there some check that we can use to
prevent it happening again?  I don't think we ever intentionally
#include a .c file.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter
  2018-08-28 21:24     ` Jonathan Nieder
@ 2018-08-28 21:33       ` Derrick Stolee
  2018-08-28 21:36       ` [PATCH] commit-reach: correct accidental #include of C file Jonathan Nieder
  1 sibling, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-08-28 21:33 UTC (permalink / raw)
  To: Jonathan Nieder, Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, jonathantanmy@google.com,
	gitster@pobox.com

On 8/28/2018 5:24 PM, Jonathan Nieder wrote:
> Hi,
>
> Derrick Stolee wrote:
>
>> There are several commit walks in the codebase. Group them together into
>> a new commit-reach.c file and corresponding header. After we group these
>> walks into one place, we can reduce duplicate logic by calling
>> equivalent methods.
>>
>> All methods are direct moves, except we also make the commit_contains()
>> method public so its consumers in ref-filter.c can still call it. We can
>> also test this method in a test-tool in a later commit.
>>
>> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
>> ---
>>   commit-reach.c | 121 +++++++++++++++++++++++++++++++++++++++++
>>   commit-reach.h |  20 ++++++-
>>   ref-filter.c   | 145 +++----------------------------------------------
>>   3 files changed, 147 insertions(+), 139 deletions(-)
>>
>> diff --git a/commit-reach.c b/commit-reach.c
>> index a6bc4781a6..01d796f011 100644
>> --- a/commit-reach.c
>> +++ b/commit-reach.c
>> @@ -1,8 +1,10 @@
>>   #include "cache.h"
>>   #include "commit.h"
>> +#include "commit-graph.h"
>>   #include "decorate.h"
>>   #include "prio-queue.h"
>>   #include "tree.h"
>> +#include "ref-filter.c"
> Did you mean "ref-filter.h"?
>
> This broke the build here.  Is there some check that we can use to
> prevent it happening again?  I don't think we ever intentionally
> #include a .c file.
Woah! How did that ever work? I definitely built this locally.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* [PATCH] commit-reach: correct accidental #include of C file
  2018-08-28 21:24     ` Jonathan Nieder
  2018-08-28 21:33       ` Derrick Stolee
@ 2018-08-28 21:36       ` Jonathan Nieder
  2018-08-28 21:39         ` Derrick Stolee
  1 sibling, 1 reply; 118+ messages in thread
From: Jonathan Nieder @ 2018-08-28 21:36 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, stolee@gmail.com,
	jonathantanmy@google.com, gitster@pobox.com

Without this change, the build breaks with clang:

 libgit/ref-filter.pic.o: multiple definition of 'filter_refs'
 libgit/commit-reach.pic.o: previous definition here

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Jonathan Nieder wrote:
> Derrick Stolee wrote:

>> --- a/commit-reach.c
>> +++ b/commit-reach.c
>> @@ -1,8 +1,10 @@
>>  #include "cache.h"
>>  #include "commit.h"
>> +#include "commit-graph.h"
>>  #include "decorate.h"
>>  #include "prio-queue.h"
>>  #include "tree.h"
>> +#include "ref-filter.c"
>
> Did you mean "ref-filter.h"?
>
> This broke the build here.  Is there some check that we can use to
> prevent it happening again?  I don't think we ever intentionally
> #include a .c file.

Here's what I'm applying locally.

Thanks,
Jonathan

 commit-reach.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/commit-reach.c b/commit-reach.c
index c996524032..86715c103c 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -4,7 +4,7 @@
 #include "decorate.h"
 #include "prio-queue.h"
 #include "tree.h"
-#include "ref-filter.c"
+#include "ref-filter.h"
 #include "revision.h"
 #include "tag.h"
 #include "commit-reach.h"
-- 
2.19.0.rc0.228.g281dcd1b4d0


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH] commit-reach: correct accidental #include of C file
  2018-08-28 21:36       ` [PATCH] commit-reach: correct accidental #include of C file Jonathan Nieder
@ 2018-08-28 21:39         ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-08-28 21:39 UTC (permalink / raw)
  To: Jonathan Nieder, Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, jonathantanmy@google.com,
	gitster@pobox.com

On 8/28/2018 5:36 PM, Jonathan Nieder wrote:
> Without this change, the build breaks with clang:
For some reason, it didn't fail with GCC for me, but this is an 
obviously correct change to make. Thanks!
>   libgit/ref-filter.pic.o: multiple definition of 'filter_refs'
>   libgit/commit-reach.pic.o: previous definition here
>
> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
> ---
> Jonathan Nieder wrote:
>> Derrick Stolee wrote:
>>> --- a/commit-reach.c
>>> +++ b/commit-reach.c
>>> @@ -1,8 +1,10 @@
>>>   #include "cache.h"
>>>   #include "commit.h"
>>> +#include "commit-graph.h"
>>>   #include "decorate.h"
>>>   #include "prio-queue.h"
>>>   #include "tree.h"
>>> +#include "ref-filter.c"
>> Did you mean "ref-filter.h"?
>>
>> This broke the build here.  Is there some check that we can use to
>> prevent it happening again?  I don't think we ever intentionally
>> #include a .c file.
> Here's what I'm applying locally.
>
> Thanks,
> Jonathan
>
>   commit-reach.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/commit-reach.c b/commit-reach.c
> index c996524032..86715c103c 100644
> --- a/commit-reach.c
> +++ b/commit-reach.c
> @@ -4,7 +4,7 @@
>   #include "decorate.h"
>   #include "prio-queue.h"
>   #include "tree.h"
> -#include "ref-filter.c"
> +#include "ref-filter.h"
>   #include "revision.h"
>   #include "tag.h"
>   #include "commit-reach.h"

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear
  2018-07-20 16:33   ` [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear Derrick Stolee
  2018-07-23 20:41     ` Jonathan Tan
@ 2018-09-12  4:14     ` Jeff King
  2018-09-12  4:29       ` Jeff King
  1 sibling, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-09-12  4:14 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, stolee@gmail.com,
	jonathantanmy@google.com, gitster@pobox.com

On Fri, Jul 20, 2018 at 04:33:28PM +0000, Derrick Stolee wrote:

> The can_all_from_reach_with_flags() algorithm is currently quadratic in
> the worst case, because it calls the reachable() method for every 'from'
> without tracking which commits have already been walked or which can
> already reach a commit in 'to'.
> 
> Rewrite the algorithm to walk each commit a constant number of times.

I got a segfault in upload-pack from 'next' today which bisected to this
patch (which became 4fbcca4eff). I think the problem is the line at the
bottom of this hunk:

>  int can_all_from_reach_with_flag(struct object_array *from,
>  				 unsigned int with_flag,
>  				 unsigned int assign_flag,
> -				 time_t min_commit_date)
> +				 time_t min_commit_date,
> +				 uint32_t min_generation)
>  {
> +	struct commit **list = NULL;
>  	int i;
> +	int result = 1;
>  
> +	ALLOC_ARRAY(list, from->nr);
>  	for (i = 0; i < from->nr; i++) {
> -		struct object *from_one = from->objects[i].item;
> +		list[i] = (struct commit *)from->objects[i].item;

Some of the objects in my array are not commits, but rather tags, so
this is a bogus cast.

You can see that the original code peeled them and threw away
non-commits:

>  
> -		if (from_one->flags & assign_flag)
> -			continue;
> -		from_one = deref_tag(the_repository, from_one, "a from object", 0);
> -		if (!from_one || from_one->type != OBJ_COMMIT) {
> -			/* no way to tell if this is reachable by
> -			 * looking at the ancestry chain alone, so
> -			 * leave a note to ourselves not to worry about
> -			 * this object anymore.
> -			 */
> -			from->objects[i].item->flags |= assign_flag;
> -			continue;
> -		}

So presumably we'd need to do something similar.

I think when we're called from can_all_from_reach(), we feed only
commits. But in this case the stack trace is:

  #0  can_all_from_reach_with_flag (from=0x55f95ff42f80 <want_obj>, with_flag=2048, assign_flag=16384, 
      min_commit_date=1513037626, min_generation=0) at commit-reach.c:567
  #1  0x000055f95fe20e92 in ok_to_give_up () at upload-pack.c:346
  #2  0x000055f95fe20efa in get_common_commits () at upload-pack.c:369
  #3  0x000055f95fe22ce9 in upload_pack (options=0x7fff7c1b81f0) at upload-pack.c:1065
  #4  0x000055f95fcdc11b in cmd_upload_pack (argc=1, argv=0x7fff7c1b8498, prefix=0x0)
      at builtin/upload-pack.c:67
  #5  0x000055f95fc39574 in run_builtin (p=0x55f95ff02248 <commands+2760>, argc=2, argv=0x7fff7c1b8498)
      at git.c:417
  #6  0x000055f95fc3987c in handle_builtin (argc=2, argv=0x7fff7c1b8498) at git.c:633
  #7  0x000055f95fc39b02 in cmd_main (argc=2, argv=0x7fff7c1b8498) at git.c:732
  #8  0x000055f95fce044b in main (argc=2, argv=0x7fff7c1b8498) at common-main.c:45

and my client was fetching some tags (though it would similarly break
with other object types). So I'd think the easy reproduction would be
just fetching a tag, but a trivial case didn't seem to trigger (it
probably needs a more substantial negotiation).

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear
  2018-09-12  4:14     ` Jeff King
@ 2018-09-12  4:29       ` Jeff King
  2018-09-12 13:08         ` Derrick Stolee
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-09-12  4:29 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, stolee@gmail.com,
	jonathantanmy@google.com, gitster@pobox.com

On Wed, Sep 12, 2018 at 12:14:25AM -0400, Jeff King wrote:

> > +	ALLOC_ARRAY(list, from->nr);
> >  	for (i = 0; i < from->nr; i++) {
> > -		struct object *from_one = from->objects[i].item;
> > +		list[i] = (struct commit *)from->objects[i].item;
> 
> Some of the objects in my array are not commits, but rather tags, so
> this is a bogus cast.
> 
> You can see that the original code peeled them and threw away
> non-commits:
> 
> >  
> > -		if (from_one->flags & assign_flag)
> > -			continue;
> > -		from_one = deref_tag(the_repository, from_one, "a from object", 0);
> > -		if (!from_one || from_one->type != OBJ_COMMIT) {
> > -			/* no way to tell if this is reachable by
> > -			 * looking at the ancestry chain alone, so
> > -			 * leave a note to ourselves not to worry about
> > -			 * this object anymore.
> > -			 */
> > -			from->objects[i].item->flags |= assign_flag;
> > -			continue;
> > -		}
> 
> So presumably we'd need to do something similar.

This patch seems to fix it for me. It's more or less a reversion of the
hunk above, though I didn't dig into whether I'm violating some other
assumption in your new code.

I think this function leaks "list" both from the location I noted here,
as well as from normal exit

---
 commit-reach.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index 622eeb313d..abe90a2f55 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -547,20 +547,31 @@ int can_all_from_reach_with_flag(struct object_array *from,
 {
 	struct commit **list = NULL;
 	int i;
+	int nr_commits;
 	int result = 1;
 
 	ALLOC_ARRAY(list, from->nr);
+	nr_commits = 0;
 	for (i = 0; i < from->nr; i++) {
-		list[i] = (struct commit *)from->objects[i].item;
+		struct object *from_one = from->objects[i].item;
 
-		if (parse_commit(list[i]) ||
-		    list[i]->generation < min_generation)
-			return 0;
+		from_one = deref_tag(the_repository, from_one,
+				     "a from object", 0);
+		if (!from_one || from_one->type != OBJ_COMMIT) {
+			from->objects[i].item->flags |= assign_flag;
+			continue;
+		}
+
+		list[nr_commits] = (struct commit *)from_one;
+		if (parse_commit(list[nr_commits]) ||
+		    list[nr_commits]->generation < min_generation)
+			return 0; /* is this a leak? */
+		nr_commits++;
 	}
 
-	QSORT(list, from->nr, compare_commits_by_gen);
+	QSORT(list, nr_commits, compare_commits_by_gen);
 
-	for (i = 0; i < from->nr; i++) {
+	for (i = 0; i < nr_commits; i++) {
 		/* DFS from list[i] */
 		struct commit_list *stack = NULL;
 
@@ -603,7 +614,7 @@ int can_all_from_reach_with_flag(struct object_array *from,
 	}
 
 cleanup:
-	for (i = 0; i < from->nr; i++) {
+	for (i = 0; i < nr_commits; i++) {
 		clear_commit_marks(list[i], RESULT);
 		clear_commit_marks(list[i], assign_flag);
 	}
-- 
2.19.0.600.ga229f7d059


^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear
  2018-09-12  4:29       ` Jeff King
@ 2018-09-12 13:08         ` Derrick Stolee
  0 siblings, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-09-12 13:08 UTC (permalink / raw)
  To: Jeff King, Derrick Stolee
  Cc: git@vger.kernel.org, sbeller@google.com, jonathantanmy@google.com,
	gitster@pobox.com

On 9/12/2018 12:29 AM, Jeff King wrote:
> On Wed, Sep 12, 2018 at 12:14:25AM -0400, Jeff King wrote:
>
>>> +	ALLOC_ARRAY(list, from->nr);
>>>   	for (i = 0; i < from->nr; i++) {
>>> -		struct object *from_one = from->objects[i].item;
>>> +		list[i] = (struct commit *)from->objects[i].item;
>> Some of the objects in my array are not commits, but rather tags, so
>> this is a bogus cast.
>>
>> You can see that the original code peeled them and threw away
>> non-commits:
>>
>>>   
>>> -		if (from_one->flags & assign_flag)
>>> -			continue;
>>> -		from_one = deref_tag(the_repository, from_one, "a from object", 0);
>>> -		if (!from_one || from_one->type != OBJ_COMMIT) {
>>> -			/* no way to tell if this is reachable by
>>> -			 * looking at the ancestry chain alone, so
>>> -			 * leave a note to ourselves not to worry about
>>> -			 * this object anymore.
>>> -			 */
>>> -			from->objects[i].item->flags |= assign_flag;
>>> -			continue;
>>> -		}
>> So presumably we'd need to do something similar.
> This patch seems to fix it for me. It's more or less a reversion of the
> hunk above, though I didn't dig into whether I'm violating some other
> assumption in your new code.
>
> I think this function leaks "list" both from the location I noted here,
> as well as from normal exit
Thanks for the report and the fix. I'll try to create  test that 
demonstrates this and then push up a full patch.
> ---
>   commit-reach.c | 25 ++++++++++++++++++-------
>   1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/commit-reach.c b/commit-reach.c
> index 622eeb313d..abe90a2f55 100644
> --- a/commit-reach.c
> +++ b/commit-reach.c
> @@ -547,20 +547,31 @@ int can_all_from_reach_with_flag(struct object_array *from,
>   {
>   	struct commit **list = NULL;
>   	int i;
> +	int nr_commits;
>   	int result = 1;
>   
>   	ALLOC_ARRAY(list, from->nr);
> +	nr_commits = 0;
>   	for (i = 0; i < from->nr; i++) {
> -		list[i] = (struct commit *)from->objects[i].item;
> +		struct object *from_one = from->objects[i].item;
>   
> -		if (parse_commit(list[i]) ||
> -		    list[i]->generation < min_generation)
> -			return 0;
> +		from_one = deref_tag(the_repository, from_one,
> +				     "a from object", 0);
> +		if (!from_one || from_one->type != OBJ_COMMIT) {
> +			from->objects[i].item->flags |= assign_flag;
> +			continue;
> +		}
> +
> +		list[nr_commits] = (struct commit *)from_one;
> +		if (parse_commit(list[nr_commits]) ||
> +		    list[nr_commits]->generation < min_generation)
> +			return 0; /* is this a leak? */
> +		nr_commits++;
>   	}
>   
> -	QSORT(list, from->nr, compare_commits_by_gen);
> +	QSORT(list, nr_commits, compare_commits_by_gen);
>   
> -	for (i = 0; i < from->nr; i++) {
> +	for (i = 0; i < nr_commits; i++) {
>   		/* DFS from list[i] */
>   		struct commit_list *stack = NULL;
>   
> @@ -603,7 +614,7 @@ int can_all_from_reach_with_flag(struct object_array *from,
>   	}
>   
>   cleanup:
> -	for (i = 0; i < from->nr; i++) {
> +	for (i = 0; i < nr_commits; i++) {
>   		clear_commit_marks(list[i], RESULT);
>   		clear_commit_marks(list[i], assign_flag);
>   	}


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-06-28 12:31 ` [PATCH 15/16] commit-reach: make can_all_from_reach... linear Derrick Stolee via GitGitGadget
  2018-07-16 22:37   ` Stefan Beller
  2018-07-17  1:16   ` Jonathan Tan
@ 2018-10-01 19:16   ` René Scharfe
  2018-10-01 19:26     ` Derrick Stolee
  2 siblings, 1 reply; 118+ messages in thread
From: René Scharfe @ 2018-10-01 19:16 UTC (permalink / raw)
  To: Derrick Stolee via GitGitGadget, git; +Cc: Junio C Hamano, Derrick Stolee

Am 28.06.2018 um 14:31 schrieb Derrick Stolee via GitGitGadget:
> diff --git a/commit-reach.c b/commit-reach.c
> index c58e50fbb..ac132c8e4 100644
> --- a/commit-reach.c
> +++ b/commit-reach.c
> @@ -513,65 +513,88 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>  	return is_descendant_of(commit, list);
>  }
>  
> -int reachable(struct commit *from, int with_flag, int assign_flag,
> -	      time_t min_commit_date)
> +static int compare_commits_by_gen(const void *_a, const void *_b)
>  {
> -	struct prio_queue work = { compare_commits_by_commit_date };
> +	const struct commit *a = (const struct commit *)_a;
> +	const struct commit *b = (const struct commit *)_b;

This cast is bogus.  QSORT gets handed a struct commit **, i.e. an array
of pointers, and qsort(1) passes references to those pointers to the
compare function, and not the pointer values.

As a result it's unlikely that the array is sorted in the intended
order.  Given that, a silly question: Is sorting even necessary here?

Anyway, the patch below should fix it.

>  
> -	prio_queue_put(&work, from);
> -	while (work.nr) {
> -		struct commit_list *list;
> -		struct commit *commit = prio_queue_get(&work);
> -
> -		if (commit->object.flags & with_flag) {
> -			from->object.flags |= assign_flag;
> -			break;
> -		}
> -		if (!commit->object.parsed)
> -			parse_object(the_repository, &commit->object.oid);
> -		if (commit->object.flags & REACHABLE)
> -			continue;
> -		commit->object.flags |= REACHABLE;
> -		if (commit->date < min_commit_date)
> -			continue;
> -		for (list = commit->parents; list; list = list->next) {
> -			struct commit *parent = list->item;
> -			if (!(parent->object.flags & REACHABLE))
> -				prio_queue_put(&work, parent);
> -		}
> -	}
> -	from->object.flags |= REACHABLE;
> -	clear_commit_marks(from, REACHABLE);
> -	clear_prio_queue(&work);
> -	return (from->object.flags & assign_flag);
> +	if (a->generation < b->generation)
> +		return -1;
> +	if (a->generation > b->generation)
> +		return 1;
> +	return 0;
>  }
>  
>  int can_all_from_reach_with_flag(struct object_array *from,
>  				 int with_flag, int assign_flag,
> -				 time_t min_commit_date)
> +				 time_t min_commit_date,
> +				 uint32_t min_generation)
>  {
> +	struct commit **list = NULL;
>  	int i;
> +	int result = 1;
>  
> +	ALLOC_ARRAY(list, from->nr);
>  	for (i = 0; i < from->nr; i++) {
> -		struct object *from_one = from->objects[i].item;
> +		list[i] = (struct commit *)from->objects[i].item;
>  
> -		if (from_one->flags & assign_flag)
> -			continue;
> -		from_one = deref_tag(the_repository, from_one, "a from object", 0);
> -		if (!from_one || from_one->type != OBJ_COMMIT) {
> -			/* no way to tell if this is reachable by
> -			 * looking at the ancestry chain alone, so
> -			 * leave a note to ourselves not to worry about
> -			 * this object anymore.
> -			 */
> -			from->objects[i].item->flags |= assign_flag;
> -			continue;
> -		}
> -		if (!reachable((struct commit *)from_one, with_flag, assign_flag,
> -			       min_commit_date))
> +		parse_commit(list[i]);
> +
> +		if (list[i]->generation < min_generation)
>  			return 0;
>  	}
> -	return 1;
> +
> +	QSORT(list, from->nr, compare_commits_by_gen);

-- >8 --
Subject: [PATCH] commit-reach: fix cast in compare_commits_by_gen()

The elements of the array to be sorted are commit pointers, so the
comparison function gets handed references to these pointers, not
pointers to commit objects.  Cast to the right type and dereference
once to correctly get the commit reference.

Found using Clang's ASan and t5500.

Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
Has this patch a performance impact?

 commit-reach.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/commit-reach.c b/commit-reach.c
index 00e5ceee6f..2f5e592d16 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -529,8 +529,8 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 
 static int compare_commits_by_gen(const void *_a, const void *_b)
 {
-	const struct commit *a = (const struct commit *)_a;
-	const struct commit *b = (const struct commit *)_b;
+	const struct commit *a = *(const struct commit * const *)_a;
+	const struct commit *b = *(const struct commit * const *)_b;
 
 	if (a->generation < b->generation)
 		return -1;
-- 
2.19.0

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-01 19:16   ` René Scharfe
@ 2018-10-01 19:26     ` Derrick Stolee
  2018-10-01 20:37       ` René Scharfe
  0 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-10-01 19:26 UTC (permalink / raw)
  To: René Scharfe, Derrick Stolee via GitGitGadget, git
  Cc: Junio C Hamano, Derrick Stolee

On 10/1/2018 3:16 PM, René Scharfe wrote:
> Am 28.06.2018 um 14:31 schrieb Derrick Stolee via GitGitGadget:
>> diff --git a/commit-reach.c b/commit-reach.c
>> index c58e50fbb..ac132c8e4 100644
>> --- a/commit-reach.c
>> +++ b/commit-reach.c
>> @@ -513,65 +513,88 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>>   	return is_descendant_of(commit, list);
>>   }
>>   
>> -int reachable(struct commit *from, int with_flag, int assign_flag,
>> -	      time_t min_commit_date)
>> +static int compare_commits_by_gen(const void *_a, const void *_b)
>>   {
>> -	struct prio_queue work = { compare_commits_by_commit_date };
>> +	const struct commit *a = (const struct commit *)_a;
>> +	const struct commit *b = (const struct commit *)_b;
> This cast is bogus.  QSORT gets handed a struct commit **, i.e. an array
> of pointers, and qsort(1) passes references to those pointers to the
> compare function, and not the pointer values.

Good catch! I'm disappointed that we couldn't use type-checking here, as 
it is quite difficult to discover that the types are wrong here.


> As a result it's unlikely that the array is sorted in the intended
> order.  Given that, a silly question: Is sorting even necessary here?

The reason to sort is to hopefully minimize the amount we walk by 
exploring the "lower" commits first. This is a performance-only thing, 
not a correctness issue (which is why the bug exists). Even then, it is 
just a heuristic.
> Anyway, the patch below should fix it.
>
> -- >8 --
> Subject: [PATCH] commit-reach: fix cast in compare_commits_by_gen()
>
> The elements of the array to be sorted are commit pointers, so the
> comparison function gets handed references to these pointers, not
> pointers to commit objects.  Cast to the right type and dereference
> once to correctly get the commit reference.
>
> Found using Clang's ASan and t5500.
>
> Signed-off-by: Rene Scharfe <l.s.r@web.de>
> ---
> Has this patch a performance impact?
>
>   commit-reach.c | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/commit-reach.c b/commit-reach.c
> index 00e5ceee6f..2f5e592d16 100644
> --- a/commit-reach.c
> +++ b/commit-reach.c
> @@ -529,8 +529,8 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>   
>   static int compare_commits_by_gen(const void *_a, const void *_b)
>   {
> -	const struct commit *a = (const struct commit *)_a;
> -	const struct commit *b = (const struct commit *)_b;
> +	const struct commit *a = *(const struct commit * const *)_a;
> +	const struct commit *b = *(const struct commit * const *)_b;

I would expect s/* const */**/ here, but I'm guessing your formulation 
is a bit extra careful about types.

Thanks!

Reviewed-by: Derrick Stolee <dstolee@microsoft.com>

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-01 19:26     ` Derrick Stolee
@ 2018-10-01 20:37       ` René Scharfe
  2018-10-04 22:59         ` René Scharfe
  0 siblings, 1 reply; 118+ messages in thread
From: René Scharfe @ 2018-10-01 20:37 UTC (permalink / raw)
  To: Derrick Stolee, Derrick Stolee via GitGitGadget, git
  Cc: Junio C Hamano, Derrick Stolee

Am 01.10.2018 um 21:26 schrieb Derrick Stolee:
> On 10/1/2018 3:16 PM, René Scharfe wrote:
>> Am 28.06.2018 um 14:31 schrieb Derrick Stolee via GitGitGadget:
>>> diff --git a/commit-reach.c b/commit-reach.c
>>> index c58e50fbb..ac132c8e4 100644
>>> --- a/commit-reach.c
>>> +++ b/commit-reach.c
>>> @@ -513,65 +513,88 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>>>   	return is_descendant_of(commit, list);
>>>   }
>>>   
>>> -int reachable(struct commit *from, int with_flag, int assign_flag,
>>> -	      time_t min_commit_date)
>>> +static int compare_commits_by_gen(const void *_a, const void *_b)
>>>   {
>>> -	struct prio_queue work = { compare_commits_by_commit_date };
>>> +	const struct commit *a = (const struct commit *)_a;
>>> +	const struct commit *b = (const struct commit *)_b;
>> This cast is bogus.  QSORT gets handed a struct commit **, i.e. an array
>> of pointers, and qsort(1) passes references to those pointers to the
>> compare function, and not the pointer values.
> 
> Good catch! I'm disappointed that we couldn't use type-checking here, as 
> it is quite difficult to discover that the types are wrong here.

Generics in C are hard, and type checking traditionally falls by the
wayside.  You could use macros for that, like klib [*] does, but that
has its own downsides (more object text, debugging the sort macros
themselves is harder).

[*] https://github.com/attractivechaos/klib/blob/master/ksort.h

>> diff --git a/commit-reach.c b/commit-reach.c
>> index 00e5ceee6f..2f5e592d16 100644
>> --- a/commit-reach.c
>> +++ b/commit-reach.c
>> @@ -529,8 +529,8 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>>   
>>   static int compare_commits_by_gen(const void *_a, const void *_b)
>>   {
>> -	const struct commit *a = (const struct commit *)_a;
>> -	const struct commit *b = (const struct commit *)_b;
>> +	const struct commit *a = *(const struct commit * const *)_a;
>> +	const struct commit *b = *(const struct commit * const *)_b;
> 
> I would expect s/* const */**/ here, but I'm guessing your formulation 
> is a bit extra careful about types.

Yeah, that second const is not necessary, as the dereference in the same
line makes it inconsequential, but I added it to make clear that this
function is really not supposed to write at all..

René

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-01 20:37       ` René Scharfe
@ 2018-10-04 22:59         ` René Scharfe
  2018-10-05 12:15           ` Derrick Stolee
  2018-10-05 16:51           ` Jeff King
  0 siblings, 2 replies; 118+ messages in thread
From: René Scharfe @ 2018-10-04 22:59 UTC (permalink / raw)
  To: Derrick Stolee, Derrick Stolee via GitGitGadget, git
  Cc: Junio C Hamano, Derrick Stolee

Am 01.10.2018 um 22:37 schrieb René Scharfe:
> Am 01.10.2018 um 21:26 schrieb Derrick Stolee:
>> On 10/1/2018 3:16 PM, René Scharfe wrote:
>>> Am 28.06.2018 um 14:31 schrieb Derrick Stolee via GitGitGadget:
>>>> diff --git a/commit-reach.c b/commit-reach.c
>>>> index c58e50fbb..ac132c8e4 100644
>>>> --- a/commit-reach.c
>>>> +++ b/commit-reach.c
>>>> @@ -513,65 +513,88 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>>>>   	return is_descendant_of(commit, list);
>>>>   }
>>>>   
>>>> -int reachable(struct commit *from, int with_flag, int assign_flag,
>>>> -	      time_t min_commit_date)
>>>> +static int compare_commits_by_gen(const void *_a, const void *_b)
>>>>   {
>>>> -	struct prio_queue work = { compare_commits_by_commit_date };
>>>> +	const struct commit *a = (const struct commit *)_a;
>>>> +	const struct commit *b = (const struct commit *)_b;
>>> This cast is bogus.  QSORT gets handed a struct commit **, i.e. an array
>>> of pointers, and qsort(1) passes references to those pointers to the
>>> compare function, and not the pointer values.
>>
>> Good catch! I'm disappointed that we couldn't use type-checking here, as 
>> it is quite difficult to discover that the types are wrong here.
> 
> Generics in C are hard, and type checking traditionally falls by the
> wayside.  You could use macros for that, like klib [*] does, but that
> has its own downsides (more object text, debugging the sort macros
> themselves is harder).
> 
> [*] https://github.com/attractivechaos/klib/blob/master/ksort.h

We could also do something like this to reduce the amount of manual
casting, but do we want to?  (Macro at the bottom, three semi-random
examples at the top.)
---
 bisect.c          | 11 +++--------
 commit-graph.c    |  9 ++-------
 commit-reach.c    | 12 +++++-------
 git-compat-util.h | 12 ++++++++++++
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/bisect.c b/bisect.c
index e8b17cf7e1..06be3a3c15 100644
--- a/bisect.c
+++ b/bisect.c
@@ -192,16 +192,11 @@ struct commit_dist {
 	int distance;
 };
 
-static int compare_commit_dist(const void *a_, const void *b_)
-{
-	struct commit_dist *a, *b;
-
-	a = (struct commit_dist *)a_;
-	b = (struct commit_dist *)b_;
+DEFINE_SORT(sort_by_commit_dist, struct commit_dist, a, b, {
 	if (a->distance != b->distance)
 		return b->distance - a->distance; /* desc sort */
 	return oidcmp(&a->commit->object.oid, &b->commit->object.oid);
-}
+})
 
 static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
 {
@@ -223,7 +218,7 @@ static struct commit_list *best_bisection_sorted(struct commit_list *list, int n
 		array[cnt].distance = distance;
 		cnt++;
 	}
-	QSORT(array, cnt, compare_commit_dist);
+	sort_by_commit_dist(array, cnt);
 	for (p = list, i = 0; i < cnt; i++) {
 		struct object *obj = &(array[i].commit->object);
 
diff --git a/commit-graph.c b/commit-graph.c
index 7f4519ec3b..a2202414e0 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -550,12 +550,7 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
 	}
 }
 
-static int commit_compare(const void *_a, const void *_b)
-{
-	const struct object_id *a = (const struct object_id *)_a;
-	const struct object_id *b = (const struct object_id *)_b;
-	return oidcmp(a, b);
-}
+DEFINE_SORT(sort_oids, struct object_id, a, b, return oidcmp(a, b))
 
 struct packed_commit_list {
 	struct commit **list;
@@ -780,7 +775,7 @@ void write_commit_graph(const char *obj_dir,
 
 	close_reachable(&oids);
 
-	QSORT(oids.list, oids.nr, commit_compare);
+	sort_oids(oids.list, oids.nr);
 
 	count_distinct = 1;
 	for (i = 1; i < oids.nr; i++) {
diff --git a/commit-reach.c b/commit-reach.c
index 2f5e592d16..3aef47c3dd 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -527,17 +527,15 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 	return is_descendant_of(commit, list);
 }
 
-static int compare_commits_by_gen(const void *_a, const void *_b)
-{
-	const struct commit *a = *(const struct commit * const *)_a;
-	const struct commit *b = *(const struct commit * const *)_b;
-
+DEFINE_SORT(sort_commits_by_gen, struct commit *, ap, bp, {
+	const struct commit *a = *ap;
+	const struct commit *b = *bp;
 	if (a->generation < b->generation)
 		return -1;
 	if (a->generation > b->generation)
 		return 1;
 	return 0;
-}
+})
 
 int can_all_from_reach_with_flag(struct object_array *from,
 				 unsigned int with_flag,
@@ -580,7 +578,7 @@ int can_all_from_reach_with_flag(struct object_array *from,
 		nr_commits++;
 	}
 
-	QSORT(list, nr_commits, compare_commits_by_gen);
+	sort_commits_by_gen(list, nr_commits);
 
 	for (i = 0; i < nr_commits; i++) {
 		/* DFS from list[i] */
diff --git a/git-compat-util.h b/git-compat-util.h
index 5f2e90932f..f9e78d69a2 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1066,6 +1066,18 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
 		qsort(base, nmemb, size, compar);
 }
 
+#define DEFINE_SORT(name, elemtype, one, two, code)			\
+static int name##_compare(const void *one##_v_, const void *two##_v_)	\
+{									\
+	elemtype const *one = one##_v_;					\
+	elemtype const *two = two##_v_;					\
+	code;								\
+}									\
+static void name(elemtype *array, size_t n)				\
+{									\
+	QSORT(array, n, name##_compare);				\
+}
+
 #ifndef HAVE_ISO_QSORT_S
 int git_qsort_s(void *base, size_t nmemb, size_t size,
 		int (*compar)(const void *, const void *, void *), void *ctx);
-- 
2.19.0



^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-04 22:59         ` René Scharfe
@ 2018-10-05 12:15           ` Derrick Stolee
  2018-10-05 16:51           ` Jeff King
  1 sibling, 0 replies; 118+ messages in thread
From: Derrick Stolee @ 2018-10-05 12:15 UTC (permalink / raw)
  To: René Scharfe, Derrick Stolee via GitGitGadget, git
  Cc: Junio C Hamano, Derrick Stolee

On 10/4/2018 6:59 PM, René Scharfe wrote:
> Am 01.10.2018 um 22:37 schrieb René Scharfe:
>> Am 01.10.2018 um 21:26 schrieb Derrick Stolee:
>>> Good catch! I'm disappointed that we couldn't use type-checking here, as
>>> it is quite difficult to discover that the types are wrong here.
>> Generics in C are hard, and type checking traditionally falls by the
>> wayside.  You could use macros for that, like klib [*] does, but that
>> has its own downsides (more object text, debugging the sort macros
>> themselves is harder).
>>
>> [*] https://github.com/attractivechaos/klib/blob/master/ksort.h
> We could also do something like this to reduce the amount of manual
> casting, but do we want to?  (Macro at the bottom, three semi-random
> examples at the top.)

I like the idea! It certainly can assist in some of the repeat work when 
preparing to QSORT, and make it less error-prone.

> ---
>   bisect.c          | 11 +++--------
>   commit-graph.c    |  9 ++-------
>   commit-reach.c    | 12 +++++-------
>   git-compat-util.h | 12 ++++++++++++
>   4 files changed, 22 insertions(+), 22 deletions(-)
>
> diff --git a/bisect.c b/bisect.c
> index e8b17cf7e1..06be3a3c15 100644
> --- a/bisect.c
> +++ b/bisect.c
> @@ -192,16 +192,11 @@ struct commit_dist {
>   	int distance;
>   };
>   
> -static int compare_commit_dist(const void *a_, const void *b_)
> -{
> -	struct commit_dist *a, *b;
> -
> -	a = (struct commit_dist *)a_;
> -	b = (struct commit_dist *)b_;
> +DEFINE_SORT(sort_by_commit_dist, struct commit_dist, a, b, {
>   	if (a->distance != b->distance)
>   		return b->distance - a->distance; /* desc sort */
>   	return oidcmp(&a->commit->object.oid, &b->commit->object.oid);
> -}
> +})
>   
>   static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
>   {
> @@ -223,7 +218,7 @@ static struct commit_list *best_bisection_sorted(struct commit_list *list, int n
>   		array[cnt].distance = distance;
>   		cnt++;
>   	}
> -	QSORT(array, cnt, compare_commit_dist);
> +	sort_by_commit_dist(array, cnt);
>   	for (p = list, i = 0; i < cnt; i++) {
>   		struct object *obj = &(array[i].commit->object);
>   
> diff --git a/commit-graph.c b/commit-graph.c
> index 7f4519ec3b..a2202414e0 100644
> --- a/commit-graph.c
> +++ b/commit-graph.c
> @@ -550,12 +550,7 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
>   	}
>   }
>   
> -static int commit_compare(const void *_a, const void *_b)
> -{
> -	const struct object_id *a = (const struct object_id *)_a;
> -	const struct object_id *b = (const struct object_id *)_b;
> -	return oidcmp(a, b);
> -}
> +DEFINE_SORT(sort_oids, struct object_id, a, b, return oidcmp(a, b))
>   
>   struct packed_commit_list {
>   	struct commit **list;
> @@ -780,7 +775,7 @@ void write_commit_graph(const char *obj_dir,
>   
>   	close_reachable(&oids);
>   
> -	QSORT(oids.list, oids.nr, commit_compare);
> +	sort_oids(oids.list, oids.nr);
>   
>   	count_distinct = 1;
>   	for (i = 1; i < oids.nr; i++) {
> diff --git a/commit-reach.c b/commit-reach.c
> index 2f5e592d16..3aef47c3dd 100644
> --- a/commit-reach.c
> +++ b/commit-reach.c
> @@ -527,17 +527,15 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
>   	return is_descendant_of(commit, list);
>   }
>   
> -static int compare_commits_by_gen(const void *_a, const void *_b)
> -{
> -	const struct commit *a = *(const struct commit * const *)_a;
> -	const struct commit *b = *(const struct commit * const *)_b;
> -
> +DEFINE_SORT(sort_commits_by_gen, struct commit *, ap, bp, {
> +	const struct commit *a = *ap;
> +	const struct commit *b = *bp;
>   	if (a->generation < b->generation)
>   		return -1;
>   	if (a->generation > b->generation)
>   		return 1;
>   	return 0;
> -}
> +})

Here, to make the macro version compile you need to cast ap and bp, 
which gives us a level of type-safety that wasn't there before. That can 
help us find errors at compile-time!

>   
>   int can_all_from_reach_with_flag(struct object_array *from,
>   				 unsigned int with_flag,
> @@ -580,7 +578,7 @@ int can_all_from_reach_with_flag(struct object_array *from,
>   		nr_commits++;
>   	}
>   
> -	QSORT(list, nr_commits, compare_commits_by_gen);
> +	sort_commits_by_gen(list, nr_commits);
>   
>   	for (i = 0; i < nr_commits; i++) {
>   		/* DFS from list[i] */
> diff --git a/git-compat-util.h b/git-compat-util.h
> index 5f2e90932f..f9e78d69a2 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -1066,6 +1066,18 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>   		qsort(base, nmemb, size, compar);
>   }
>   
> +#define DEFINE_SORT(name, elemtype, one, two, code)			\
> +static int name##_compare(const void *one##_v_, const void *two##_v_)	\
> +{									\
> +	elemtype const *one = one##_v_;					\
> +	elemtype const *two = two##_v_;					\
> +	code;								\
> +}									\
> +static void name(elemtype *array, size_t n)				\
> +{									\
> +	QSORT(array, n, name##_compare);				\
> +}
> +

I would use this macro.

Thanks,

-Stolee


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-04 22:59         ` René Scharfe
  2018-10-05 12:15           ` Derrick Stolee
@ 2018-10-05 16:51           ` Jeff King
  2018-10-05 18:48             ` René Scharfe
  2018-10-05 19:12             ` Ævar Arnfjörð Bjarmason
  1 sibling, 2 replies; 118+ messages in thread
From: Jeff King @ 2018-10-05 16:51 UTC (permalink / raw)
  To: René Scharfe
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

On Fri, Oct 05, 2018 at 12:59:02AM +0200, René Scharfe wrote:

> We could also do something like this to reduce the amount of manual
> casting, but do we want to?  (Macro at the bottom, three semi-random
> examples at the top.)
> [...]
> diff --git a/git-compat-util.h b/git-compat-util.h
> index 5f2e90932f..f9e78d69a2 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -1066,6 +1066,18 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>  		qsort(base, nmemb, size, compar);
>  }
>  
> +#define DEFINE_SORT(name, elemtype, one, two, code)			\
> +static int name##_compare(const void *one##_v_, const void *two##_v_)	\
> +{									\
> +	elemtype const *one = one##_v_;					\
> +	elemtype const *two = two##_v_;					\
> +	code;								\
> +}									\
> +static void name(elemtype *array, size_t n)				\
> +{									\
> +	QSORT(array, n, name##_compare);				\
> +}

Interesting. When I saw the callers of this macro, I first thought you
were just removing the casts from the comparison function, but the real
value here is the matching QSORT() wrapper which provides the type
safety.

I'm not wild about declaring functions inside macros, just because it
makes tools like ctags like useful (but I have certainly been guilty of
it myself). I'd also worry that taking "code" as a macro parameter might
not scale (what happens if the code has a comma in it?)

I think we can address that last part by switching the definition order.
Like:

  #define DEFINE_SORT(name, elemtype, one, two) \
  static int name##_compare(const void *, const void *);                \
  static void name(elemtype *array, size_t n)                           \
  {                                                                     \
	QSORT(array, n, name##_compare);                                \
  }                                                                     \
  static int name##_compare(const void *one##_v_, const void *two##_v_) \
  {                                                                     \
	elemtype const *one = one##_v_;					\
	elemtype const *two = two##_v_;					\

And then expecting the caller to do:

  DEFINE_SORT(foo, struct foo, a, b)
     /* code goes here */
  }

The unbalanced braces are nasty, though (and likely to screw up editor
formatting, highlighting, etc).

I wonder if it would be possible to just declare the comparison function
with its real types, and then teach QSORT() to do a type check. That
would require typeof() at least, but it would be OK for the type-check
to be available only to gcc/clang users, I think.

I'm not quite sure what that type-check would look like, but I was
thinking something along the lines of (inside the QSORT macro):

  do {
    /* this will yield a type mismatch if fed the wrong function */
    int (*check)(const typeof(array), const typeof(array)) = compar;
    sane_qsort(array, n, sizeof(*array), n);
  } while (0)

I have no idea if that even comes close to compiling, though.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 16:51           ` Jeff King
@ 2018-10-05 18:48             ` René Scharfe
  2018-10-05 19:08               ` Jeff King
  2018-10-05 19:12             ` Ævar Arnfjörð Bjarmason
  1 sibling, 1 reply; 118+ messages in thread
From: René Scharfe @ 2018-10-05 18:48 UTC (permalink / raw)
  To: Jeff King
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

Am 05.10.2018 um 18:51 schrieb Jeff King:
> On Fri, Oct 05, 2018 at 12:59:02AM +0200, René Scharfe wrote:
> 
>> We could also do something like this to reduce the amount of manual
>> casting, but do we want to?  (Macro at the bottom, three semi-random
>> examples at the top.)
>> [...]
>> diff --git a/git-compat-util.h b/git-compat-util.h
>> index 5f2e90932f..f9e78d69a2 100644
>> --- a/git-compat-util.h
>> +++ b/git-compat-util.h
>> @@ -1066,6 +1066,18 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>>  		qsort(base, nmemb, size, compar);
>>  }
>>  
>> +#define DEFINE_SORT(name, elemtype, one, two, code)			\
>> +static int name##_compare(const void *one##_v_, const void *two##_v_)	\
>> +{									\
>> +	elemtype const *one = one##_v_;					\
>> +	elemtype const *two = two##_v_;					\
>> +	code;								\
>> +}									\
>> +static void name(elemtype *array, size_t n)				\
>> +{									\
>> +	QSORT(array, n, name##_compare);				\
>> +}
> 
> Interesting. When I saw the callers of this macro, I first thought you
> were just removing the casts from the comparison function, but the real
> value here is the matching QSORT() wrapper which provides the type
> safety.

Indeed.

> I'm not wild about declaring functions inside macros, just because it
> makes tools like ctags like useful (but I have certainly been guilty of
> it myself). I'd also worry that taking "code" as a macro parameter might
> not scale (what happens if the code has a comma in it?)

It works fine, as long as the comma is surrounded by parentheses, so
function calls with more than one parameter are fine without any change.

> I think we can address that last part by switching the definition order.
> Like:
> 
>   #define DEFINE_SORT(name, elemtype, one, two) \
>   static int name##_compare(const void *, const void *);                \
>   static void name(elemtype *array, size_t n)                           \
>   {                                                                     \
> 	QSORT(array, n, name##_compare);                                \
>   }                                                                     \
>   static int name##_compare(const void *one##_v_, const void *two##_v_) \
>   {                                                                     \
> 	elemtype const *one = one##_v_;					\
> 	elemtype const *two = two##_v_;					\
> 
> And then expecting the caller to do:
> 
>   DEFINE_SORT(foo, struct foo, a, b)
>      /* code goes here */
>   }
> 
> The unbalanced braces are nasty, though (and likely to screw up editor
> formatting, highlighting, etc).

Adding an extra pair of parentheses if needed is also not ideal, but has
less downsides, I think.

> I wonder if it would be possible to just declare the comparison function
> with its real types, and then teach QSORT() to do a type check. That
> would require typeof() at least, but it would be OK for the type-check
> to be available only to gcc/clang users, I think.
> 
> I'm not quite sure what that type-check would look like, but I was
> thinking something along the lines of (inside the QSORT macro):
> 
>   do {
>     /* this will yield a type mismatch if fed the wrong function */
>     int (*check)(const typeof(array), const typeof(array)) = compar;
>     sane_qsort(array, n, sizeof(*array), n);
>   } while (0)
> 
> I have no idea if that even comes close to compiling, though.

If the comparison function has proper types then we need to declare a
version with void pointer parameters as well to give to qsort(3).  I
think using cast function pointers is undefined.  Perhaps like this?

---
 bisect.c          | 11 +++++------
 commit-graph.c    |  8 ++++----
 commit-reach.c    | 12 +++++++-----
 git-compat-util.h | 14 ++++++++++++++
 4 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/bisect.c b/bisect.c
index e8b17cf7e1..1fc6278c6b 100644
--- a/bisect.c
+++ b/bisect.c
@@ -192,17 +192,16 @@ struct commit_dist {
 	int distance;
 };
 
-static int compare_commit_dist(const void *a_, const void *b_)
+static int compare_commit_dist(const struct commit_dist *a,
+			       const struct commit_dist *b)
 {
-	struct commit_dist *a, *b;
-
-	a = (struct commit_dist *)a_;
-	b = (struct commit_dist *)b_;
 	if (a->distance != b->distance)
 		return b->distance - a->distance; /* desc sort */
 	return oidcmp(&a->commit->object.oid, &b->commit->object.oid);
 }
 
+DEFINE_SORT(sort_by_commit_dist, struct commit_dist *, compare_commit_dist)
+
 static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
 {
 	struct commit_list *p;
@@ -223,7 +222,7 @@ static struct commit_list *best_bisection_sorted(struct commit_list *list, int n
 		array[cnt].distance = distance;
 		cnt++;
 	}
-	QSORT(array, cnt, compare_commit_dist);
+	sort_by_commit_dist(array, cnt);
 	for (p = list, i = 0; i < cnt; i++) {
 		struct object *obj = &(array[i].commit->object);
 
diff --git a/commit-graph.c b/commit-graph.c
index 7f4519ec3b..07d302fefd 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -550,13 +550,13 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
 	}
 }
 
-static int commit_compare(const void *_a, const void *_b)
+static int commit_compare(const struct object_id *a, const struct object_id *b)
 {
-	const struct object_id *a = (const struct object_id *)_a;
-	const struct object_id *b = (const struct object_id *)_b;
 	return oidcmp(a, b);
 }
 
+DEFINE_SORT(sort_oids, struct object_id *, commit_compare)
+
 struct packed_commit_list {
 	struct commit **list;
 	int nr;
@@ -780,7 +780,7 @@ void write_commit_graph(const char *obj_dir,
 
 	close_reachable(&oids);
 
-	QSORT(oids.list, oids.nr, commit_compare);
+	sort_oids(oids.list, oids.nr);
 
 	count_distinct = 1;
 	for (i = 1; i < oids.nr; i++) {
diff --git a/commit-reach.c b/commit-reach.c
index 2f5e592d16..496c4201af 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -527,11 +527,11 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
 	return is_descendant_of(commit, list);
 }
 
-static int compare_commits_by_gen(const void *_a, const void *_b)
+static int compare_commits_by_gen(const struct commit * const *ap,
+				  const struct commit * const *bp)
 {
-	const struct commit *a = *(const struct commit * const *)_a;
-	const struct commit *b = *(const struct commit * const *)_b;
-
+	const struct commit *a = *ap;
+	const struct commit *b = *bp;
 	if (a->generation < b->generation)
 		return -1;
 	if (a->generation > b->generation)
@@ -539,6 +539,8 @@ static int compare_commits_by_gen(const void *_a, const void *_b)
 	return 0;
 }
 
+DEFINE_SORT(sort_commits_by_gen, struct commit **, compare_commits_by_gen)
+
 int can_all_from_reach_with_flag(struct object_array *from,
 				 unsigned int with_flag,
 				 unsigned int assign_flag,
@@ -580,7 +582,7 @@ int can_all_from_reach_with_flag(struct object_array *from,
 		nr_commits++;
 	}
 
-	QSORT(list, nr_commits, compare_commits_by_gen);
+	sort_commits_by_gen(list, nr_commits);
 
 	for (i = 0; i < nr_commits; i++) {
 		/* DFS from list[i] */
diff --git a/git-compat-util.h b/git-compat-util.h
index 5f2e90932f..2462173790 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1066,6 +1066,20 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
 		qsort(base, nmemb, size, compar);
 }
 
+#define DEFINE_SORT(name, type, compare)				\
+static int compare##_void(const void *one, const void *two)		\
+{									\
+	return compare(one, two);					\
+}									\
+static void name(type base, size_t nmemb)				\
+{									\
+	const type dummy = NULL;					\
+	if (nmemb > 1)							\
+		qsort(base, nmemb, sizeof(base[0]), compare##_void);	\
+	else if (0)							\
+		compare(dummy, dummy);					\
+}
+
 #ifndef HAVE_ISO_QSORT_S
 int git_qsort_s(void *base, size_t nmemb, size_t size,
 		int (*compar)(const void *, const void *, void *), void *ctx);
-- 
2.19.0

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 18:48             ` René Scharfe
@ 2018-10-05 19:08               ` Jeff King
  2018-10-05 19:36                 ` René Scharfe
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-10-05 19:08 UTC (permalink / raw)
  To: René Scharfe
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

On Fri, Oct 05, 2018 at 08:48:27PM +0200, René Scharfe wrote:

> If the comparison function has proper types then we need to declare a
> version with void pointer parameters as well to give to qsort(3).  I
> think using cast function pointers is undefined.  Perhaps like this?

I think it's undefined, too, though we have many instances already.

> +#define DEFINE_SORT(name, type, compare)				\
> +static int compare##_void(const void *one, const void *two)		\
> +{									\
> +	return compare(one, two);					\
> +}									\
> +static void name(type base, size_t nmemb)				\
> +{									\
> +	const type dummy = NULL;					\
> +	if (nmemb > 1)							\
> +		qsort(base, nmemb, sizeof(base[0]), compare##_void);	\
> +	else if (0)							\
> +		compare(dummy, dummy);					\
> +}

I do like that this removes the need to have the code block aspart of
the macro.

Did you measure to see if there is any runtime impact?

As an aside, we may need to take a "scope" argument in case somebody
wants to do this in a non-static way. It would be nice if we could make
this "static inline", but I don't think even a clever compiler would be
able to omit the wrapper call.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 16:51           ` Jeff King
  2018-10-05 18:48             ` René Scharfe
@ 2018-10-05 19:12             ` Ævar Arnfjörð Bjarmason
  2018-10-05 19:28               ` Jeff King
  1 sibling, 1 reply; 118+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2018-10-05 19:12 UTC (permalink / raw)
  To: Jeff King
  Cc: René Scharfe, Derrick Stolee,
	Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee


On Fri, Oct 05 2018, Jeff King wrote:

> On Fri, Oct 05, 2018 at 12:59:02AM +0200, René Scharfe wrote:
>
>> We could also do something like this to reduce the amount of manual
>> casting, but do we want to?  (Macro at the bottom, three semi-random
>> examples at the top.)
>> [...]
>> diff --git a/git-compat-util.h b/git-compat-util.h
>> index 5f2e90932f..f9e78d69a2 100644
>> --- a/git-compat-util.h
>> +++ b/git-compat-util.h
>> @@ -1066,6 +1066,18 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>>  		qsort(base, nmemb, size, compar);
>>  }
>>
>> +#define DEFINE_SORT(name, elemtype, one, two, code)			\
>> +static int name##_compare(const void *one##_v_, const void *two##_v_)	\
>> +{									\
>> +	elemtype const *one = one##_v_;					\
>> +	elemtype const *two = two##_v_;					\
>> +	code;								\
>> +}									\
>> +static void name(elemtype *array, size_t n)				\
>> +{									\
>> +	QSORT(array, n, name##_compare);				\
>> +}
>
> Interesting. When I saw the callers of this macro, I first thought you
> were just removing the casts from the comparison function, but the real
> value here is the matching QSORT() wrapper which provides the type
> safety.
>
> I'm not wild about declaring functions inside macros, just because it
> makes tools like ctags like useful (but I have certainly been guilty of
> it myself). I'd also worry that taking "code" as a macro parameter might
> not scale (what happens if the code has a comma in it?)

There's always the option of generating the C code as part of some build
step and carrying around a big C file with various type-safe functions
that only differ in the types they operate on. It can even be committed
to source control.

That sucks in some ways for sure, but is a lot friendlier for grepping,
ctags etc.

I've just barely resisted the urge to include that thread where we were
discussing making the code C++-compiler compatible in the References
header :)

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 19:12             ` Ævar Arnfjörð Bjarmason
@ 2018-10-05 19:28               ` Jeff King
  2018-10-05 19:42                 ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-10-05 19:28 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: René Scharfe, Derrick Stolee,
	Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee

On Fri, Oct 05, 2018 at 09:12:09PM +0200, Ævar Arnfjörð Bjarmason wrote:

> > I'm not wild about declaring functions inside macros, just because it
> > makes tools like ctags like useful (but I have certainly been guilty of
> > it myself). I'd also worry that taking "code" as a macro parameter might
> > not scale (what happens if the code has a comma in it?)
> 
> There's always the option of generating the C code as part of some build
> step and carrying around a big C file with various type-safe functions
> that only differ in the types they operate on. It can even be committed
> to source control.
> 
> That sucks in some ways for sure, but is a lot friendlier for grepping,
> ctags etc.

Yeah, in a lot of ways the C preprocessor is not great for larger-scale
code generation. I was hoping we could get away without having the
bodies of these functions as part of the generated bit, though.

I think what René showed later in the thread is not too bad in that
respect.

> I've just barely resisted the urge to include that thread where we were
> discussing making the code C++-compiler compatible in the References
> header :)

Yes. The main thing I would want out of using C++ is type-safe,
efficient data structures. IIRC, early versions of C++ were implemented
via code generation, and we're basically walking down that same road.

I'm not sure where the right cutoff is, though. It's nice to pick up
the solution somebody else produced, but requiring a C++ compiler to
build Git is a pretty big step that I imagine will create a lot of new
problems. (We're just now allowing C99 -- I don't even want to think
about what kind of compiler issues we'll run into on antique systems
trying to use C++).

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 19:08               ` Jeff King
@ 2018-10-05 19:36                 ` René Scharfe
  2018-10-05 19:42                   ` Jeff King
  0 siblings, 1 reply; 118+ messages in thread
From: René Scharfe @ 2018-10-05 19:36 UTC (permalink / raw)
  To: Jeff King
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

Am 05.10.2018 um 21:08 schrieb Jeff King:
> On Fri, Oct 05, 2018 at 08:48:27PM +0200, René Scharfe wrote:
>> +#define DEFINE_SORT(name, type, compare)				\
>> +static int compare##_void(const void *one, const void *two)		\
>> +{									\
>> +	return compare(one, two);					\
>> +}									\
>> +static void name(type base, size_t nmemb)				\
>> +{									\
>> +	const type dummy = NULL;					\
>> +	if (nmemb > 1)							\
>> +		qsort(base, nmemb, sizeof(base[0]), compare##_void);	\
>> +	else if (0)							\
>> +		compare(dummy, dummy);					\
>> +}
> 
> I do like that this removes the need to have the code block aspart of
> the macro.
> 
> Did you measure to see if there is any runtime impact?

No, but I wouldn't expect any -- the generated code should be the same
in most cases.

Here's an example: https://godbolt.org/z/gwXENy.

> As an aside, we may need to take a "scope" argument in case somebody
> wants to do this in a non-static way.

Sure.  (They could easily wrap the static function, but a macro
parameter is simpler still.)

> It would be nice if we could make
> this "static inline", but I don't think even a clever compiler would be
> able to omit the wrapper call.

It could, if it was to inline qsort(3).  Current compilers don't do
that AFAIK, but I wouldn't be too surprised if they started to.

The typed comparison function can be inlined into the one with the void
pointers, though.

René

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 19:36                 ` René Scharfe
@ 2018-10-05 19:42                   ` Jeff King
  2018-10-14 14:29                     ` René Scharfe
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-10-05 19:42 UTC (permalink / raw)
  To: René Scharfe
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

On Fri, Oct 05, 2018 at 09:36:28PM +0200, René Scharfe wrote:

> Am 05.10.2018 um 21:08 schrieb Jeff King:
> > On Fri, Oct 05, 2018 at 08:48:27PM +0200, René Scharfe wrote:
> >> +#define DEFINE_SORT(name, type, compare)				\
> >> +static int compare##_void(const void *one, const void *two)		\
> >> +{									\
> >> +	return compare(one, two);					\
> >> +}									\
> >> +static void name(type base, size_t nmemb)				\
> >> +{									\
> >> +	const type dummy = NULL;					\
> >> +	if (nmemb > 1)							\
> >> +		qsort(base, nmemb, sizeof(base[0]), compare##_void);	\
> >> +	else if (0)							\
> >> +		compare(dummy, dummy);					\
> >> +}
> > 
> > I do like that this removes the need to have the code block aspart of
> > the macro.
> > 
> > Did you measure to see if there is any runtime impact?
> 
> No, but I wouldn't expect any -- the generated code should be the same
> in most cases.
> 
> Here's an example: https://godbolt.org/z/gwXENy.

OK, that's good enough for me.

> The typed comparison function can be inlined into the one with the void
> pointers, though.

Right, that makes sense. I suspect it depends on the comparison function
being static, but in a DEFINE_SORT() world, they generally could be.

So I like this approach.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 19:28               ` Jeff King
@ 2018-10-05 19:42                 ` Ævar Arnfjörð Bjarmason
  2018-10-05 19:44                   ` Jeff King
  0 siblings, 1 reply; 118+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2018-10-05 19:42 UTC (permalink / raw)
  To: Jeff King
  Cc: René Scharfe, Derrick Stolee,
	Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee


On Fri, Oct 05 2018, Jeff King wrote:

> On Fri, Oct 05, 2018 at 09:12:09PM +0200, Ævar Arnfjörð Bjarmason wrote:
>
>> > I'm not wild about declaring functions inside macros, just because it
>> > makes tools like ctags like useful (but I have certainly been guilty of
>> > it myself). I'd also worry that taking "code" as a macro parameter might
>> > not scale (what happens if the code has a comma in it?)
>>
>> There's always the option of generating the C code as part of some build
>> step and carrying around a big C file with various type-safe functions
>> that only differ in the types they operate on. It can even be committed
>> to source control.
>>
>> That sucks in some ways for sure, but is a lot friendlier for grepping,
>> ctags etc.
>
> Yeah, in a lot of ways the C preprocessor is not great for larger-scale
> code generation. I was hoping we could get away without having the
> bodies of these functions as part of the generated bit, though.
>
> I think what René showed later in the thread is not too bad in that
> respect.
>
>> I've just barely resisted the urge to include that thread where we were
>> discussing making the code C++-compiler compatible in the References
>> header :)
>
> Yes. The main thing I would want out of using C++ is type-safe,
> efficient data structures. IIRC, early versions of C++ were implemented
> via code generation, and we're basically walking down that same road.
>
> I'm not sure where the right cutoff is, though. It's nice to pick up
> the solution somebody else produced, but requiring a C++ compiler to
> build Git is a pretty big step[...]

No comment on whether git should use C++...

>  that I imagine will create a lot of new problems. (We're just now
> allowing C99 -- I don't even want to think about what kind of compiler
> issues we'll run into on antique systems trying to use C++).

...But just on this point: I was under the impression that this problem
was way easier with C++. I.e. reason we're just now using C99 for
portable C projects is because Microsoft for years refused to put any
effort into updating their compiler to support newer C versions, while
keeping up-to-date with C++, and that this has only recently started
changing: https://en.wikipedia.org/wiki/C99#Implementations

Maybe there was some other popular vendor of C/C++ compilers that had
the inverse of that story, but I'm not aware of any.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 19:42                 ` Ævar Arnfjörð Bjarmason
@ 2018-10-05 19:44                   ` Jeff King
  0 siblings, 0 replies; 118+ messages in thread
From: Jeff King @ 2018-10-05 19:44 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: René Scharfe, Derrick Stolee,
	Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee

On Fri, Oct 05, 2018 at 09:42:49PM +0200, Ævar Arnfjörð Bjarmason wrote:

> >  that I imagine will create a lot of new problems. (We're just now
> > allowing C99 -- I don't even want to think about what kind of compiler
> > issues we'll run into on antique systems trying to use C++).
> 
> ...But just on this point: I was under the impression that this problem
> was way easier with C++. I.e. reason we're just now using C99 for
> portable C projects is because Microsoft for years refused to put any
> effort into updating their compiler to support newer C versions, while
> keeping up-to-date with C++, and that this has only recently started
> changing: https://en.wikipedia.org/wiki/C99#Implementations
> 
> Maybe there was some other popular vendor of C/C++ compilers that had
> the inverse of that story, but I'm not aware of any.

I'd worry about what the C++ story is on AIX, etc.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-05 19:42                   ` Jeff King
@ 2018-10-14 14:29                     ` René Scharfe
  2018-10-15 15:31                       ` Derrick Stolee
                                         ` (2 more replies)
  0 siblings, 3 replies; 118+ messages in thread
From: René Scharfe @ 2018-10-14 14:29 UTC (permalink / raw)
  To: Jeff King
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

Am 05.10.2018 um 21:42 schrieb Jeff King:
> On Fri, Oct 05, 2018 at 09:36:28PM +0200, René Scharfe wrote:
> 
>> Am 05.10.2018 um 21:08 schrieb Jeff King:
>>> On Fri, Oct 05, 2018 at 08:48:27PM +0200, René Scharfe wrote:
>>>> +#define DEFINE_SORT(name, type, compare)				\
>>>> +static int compare##_void(const void *one, const void *two)		\
>>>> +{									\
>>>> +	return compare(one, two);					\
>>>> +}									\
>>>> +static void name(type base, size_t nmemb)				\
>>>> +{									\
>>>> +	const type dummy = NULL;					\
>>>> +	if (nmemb > 1)							\
>>>> +		qsort(base, nmemb, sizeof(base[0]), compare##_void);	\
>>>> +	else if (0)							\
>>>> +		compare(dummy, dummy);					\
>>>> +}
>>>
>>> I do like that this removes the need to have the code block aspart of
>>> the macro.
>>>
>>> Did you measure to see if there is any runtime impact?
>>
>> No, but I wouldn't expect any -- the generated code should be the same
>> in most cases.
>>
>> Here's an example: https://godbolt.org/z/gwXENy.
> 
> OK, that's good enough for me.
> 
>> The typed comparison function can be inlined into the one with the void
>> pointers, though.
> 
> Right, that makes sense. I suspect it depends on the comparison function
> being static, but in a DEFINE_SORT() world, they generally could be.
> 
> So I like this approach.

It still has some repetition, converted code is a bit longer than the
current one, and I don't know how to build a Coccinelle rule that would
do that conversion.

Looked for a possibility to at least leave QSORT call-sites alone by
enhancing that macro, but didn't find any.  Found a few websites
showing off mindblowing macros, thouhg, this one in particular:

https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms

Anyway, drove the generative approach a bit further, and came up with
the new DEFINE_SORT below.  I'm unsure about the name; perhaps it should
be called DEFINE_SORT_BY_COMPARE_FUNCTION_BODY, but that's a bit long.
It handles casts and const attributes behind the scenes and avoids
repetition, but looks a bit weird, as it is placed where a function
signature would go.

Apart from that the macro is simple and doesn't use any tricks or
added checks.  It just sets up boilerplate functions to offer type-safe
sorting.

diffcore-rename.c and refs/packed-backend.c receive special treatment in
the patch because their compare functions are used outside of sorting as
well.  I made them take typed pointers nevertheless and used them from
DEFINE_SORT; the wrapper generated by that macro is supposed to be
private.  Given that such reuse is rare and I think we don't need a way
to make it public.

What do y'all think about this direction?

---
 bisect.c                |  8 ++------
 builtin/describe.c      |  6 ++----
 builtin/fmt-merge-msg.c | 10 ++++------
 builtin/index-pack.c    | 14 ++++----------
 builtin/name-rev.c      |  5 ++---
 builtin/pack-objects.c  |  7 ++-----
 builtin/remote.c        |  6 ++----
 builtin/shortlog.c      | 15 ++++++++-------
 commit-graph.c          |  6 ++----
 delta-islands.c         |  8 +++-----
 diff.c                  |  8 +++-----
 diffcore-delta.c        |  7 ++-----
 diffcore-order.c        |  7 ++-----
 diffcore-rename.c       | 11 +++++++----
 git-compat-util.h       | 15 +++++++++++++++
 help.c                  |  7 ++-----
 line-log.c              |  7 ++-----
 midx.c                  | 12 ++++--------
 pack-check.c            |  6 ++----
 pathspec.c              |  8 ++------
 refs/packed-backend.c   | 11 ++++++++---
 sha1-array.c            |  4 ++--
 sha1-name.c             |  4 ++--
 23 files changed, 84 insertions(+), 108 deletions(-)

diff --git a/bisect.c b/bisect.c
index e8b17cf7e1..25257c2e69 100644
--- a/bisect.c
+++ b/bisect.c
@@ -192,12 +192,8 @@ struct commit_dist {
 	int distance;
 };
 
-static int compare_commit_dist(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_by_commit_dist, struct commit_dist *, a, b)
 {
-	struct commit_dist *a, *b;
-
-	a = (struct commit_dist *)a_;
-	b = (struct commit_dist *)b_;
 	if (a->distance != b->distance)
 		return b->distance - a->distance; /* desc sort */
 	return oidcmp(&a->commit->object.oid, &b->commit->object.oid);
@@ -223,7 +219,7 @@ static struct commit_list *best_bisection_sorted(struct commit_list *list, int n
 		array[cnt].distance = distance;
 		cnt++;
 	}
-	QSORT(array, cnt, compare_commit_dist);
+	sort_by_commit_dist(array, cnt);
 	for (p = list, i = 0; i < cnt; i++) {
 		struct object *obj = &(array[i].commit->object);
 
diff --git a/builtin/describe.c b/builtin/describe.c
index 22c0541da5..44eaadf0a0 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -220,10 +220,8 @@ struct possible_tag {
 	unsigned flag_within;
 };
 
-static int compare_pt(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_pt, struct possible_tag *, a, b)
 {
-	struct possible_tag *a = (struct possible_tag *)a_;
-	struct possible_tag *b = (struct possible_tag *)b_;
 	if (a->depth != b->depth)
 		return a->depth - b->depth;
 	if (a->found_order != b->found_order)
@@ -410,7 +408,7 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst)
 			    oid_to_hex(cmit_oid));
 	}
 
-	QSORT(all_matches, match_cnt, compare_pt);
+	sort_pt(all_matches, match_cnt);
 
 	if (gave_up_on) {
 		commit_list_insert_by_date(gave_up_on, &list);
diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index 59a40342b6..2c84349a1b 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -268,9 +268,9 @@ static void record_person(int which, struct string_list *people,
 	unuse_commit_buffer(commit, buffer);
 }
 
-static int cmp_string_list_util_as_integral(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_string_list_by_util_as_integral,
+	    struct string_list_item *, a, b)
 {
-	const struct string_list_item *a = a_, *b = b_;
 	return util_as_integral(b) - util_as_integral(a);
 }
 
@@ -319,10 +319,8 @@ static void add_people_info(struct strbuf *out,
 			    struct string_list *authors,
 			    struct string_list *committers)
 {
-	QSORT(authors->items, authors->nr,
-	      cmp_string_list_util_as_integral);
-	QSORT(committers->items, committers->nr,
-	      cmp_string_list_util_as_integral);
+	sort_string_list_by_util_as_integral(authors->items, authors->nr);
+	sort_string_list_by_util_as_integral(committers->items, committers->nr);
 
 	credit_people(out, authors, 'a');
 	credit_people(out, committers, 'c');
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 2004e25da2..d3e91afb50 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1066,21 +1066,15 @@ static void find_unresolved_deltas(struct base_data *base)
 	}
 }
 
-static int compare_ofs_delta_entry(const void *a, const void *b)
+DEFINE_SORT(static, sort_by_offset, struct ofs_delta_entry *, delta_a, delta_b)
 {
-	const struct ofs_delta_entry *delta_a = a;
-	const struct ofs_delta_entry *delta_b = b;
-
 	return delta_a->offset < delta_b->offset ? -1 :
 	       delta_a->offset > delta_b->offset ?  1 :
 	       0;
 }
 
-static int compare_ref_delta_entry(const void *a, const void *b)
+DEFINE_SORT(static, sort_by_oid, struct ref_delta_entry *, delta_a, delta_b)
 {
-	const struct ref_delta_entry *delta_a = a;
-	const struct ref_delta_entry *delta_b = b;
-
 	return oidcmp(&delta_a->oid, &delta_b->oid);
 }
 
@@ -1206,8 +1200,8 @@ static void resolve_deltas(void)
 		return;
 
 	/* Sort deltas by base SHA1/offset for fast searching */
-	QSORT(ofs_deltas, nr_ofs_deltas, compare_ofs_delta_entry);
-	QSORT(ref_deltas, nr_ref_deltas, compare_ref_delta_entry);
+	sort_by_offset(ofs_deltas, nr_ofs_deltas);
+	sort_by_oid(ref_deltas, nr_ref_deltas);
 
 	if (verbose || show_resolving_progress)
 		progress = start_progress(_("Resolving deltas"),
diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index f1cb45c227..e8d2da1101 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -196,9 +196,8 @@ static void add_to_tip_table(const struct object_id *oid, const char *refname,
 	tip_table.sorted = 0;
 }
 
-static int tipcmp(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_by_oid, struct tip_table_entry *, a, b)
 {
-	const struct tip_table_entry *a = a_, *b = b_;
 	return oidcmp(&a->oid, &b->oid);
 }
 
@@ -293,7 +292,7 @@ static const char *get_exact_ref_match(const struct object *o)
 		return NULL;
 
 	if (!tip_table.sorted) {
-		QSORT(tip_table.table, tip_table.nr, tipcmp);
+		sort_by_oid(tip_table.table, tip_table.nr);
 		tip_table.sorted = 1;
 	}
 
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index e6316d294d..fc37ae186f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2868,11 +2868,8 @@ static void mark_in_pack_object(struct object *object, struct packed_git *p, str
  * Compare the objects in the offset order, in order to emulate the
  * "git rev-list --objects" output that produced the pack originally.
  */
-static int ofscmp(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_in_pack_objects, struct in_pack_object *, a, b)
 {
-	struct in_pack_object *a = (struct in_pack_object *)a_;
-	struct in_pack_object *b = (struct in_pack_object *)b_;
-
 	if (a->offset < b->offset)
 		return -1;
 	else if (a->offset > b->offset)
@@ -2912,7 +2909,7 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs)
 	}
 
 	if (in_pack.nr) {
-		QSORT(in_pack.array, in_pack.nr, ofscmp);
+		sort_in_pack_objects(in_pack.array, in_pack.nr);
 		for (i = 0; i < in_pack.nr; i++) {
 			struct object *o = in_pack.array[i].object;
 			add_object_entry(&o->oid, o->type, "", 0);
diff --git a/builtin/remote.c b/builtin/remote.c
index f7edf7f2cb..0f9e70d445 100644
--- a/builtin/remote.c
+++ b/builtin/remote.c
@@ -1011,10 +1011,8 @@ static int add_push_to_show_info(struct string_list_item *push_item, void *cb_da
  * Sorting comparison for a string list that has push_info
  * structs in its util field
  */
-static int cmp_string_with_push(const void *va, const void *vb)
+DEFINE_SORT(static, sort_by_push_info, struct string_list_item *, a, b)
 {
-	const struct string_list_item *a = va;
-	const struct string_list_item *b = vb;
 	const struct push_info *a_push = a->util;
 	const struct push_info *b_push = b->util;
 	int cmp = strcmp(a->string, b->string);
@@ -1216,7 +1214,7 @@ static int show(int argc, const char **argv)
 
 		info.width = info.width2 = 0;
 		for_each_string_list(&states.push, add_push_to_show_info, &info);
-		QSORT(info.list->items, info.list->nr, cmp_string_with_push);
+		sort_by_push_info(info.list->items, info.list->nr);
 		if (info.list->nr)
 			printf_ln(Q_("  Local ref configured for 'git push'%s:",
 				     "  Local refs configured for 'git push'%s:",
diff --git a/builtin/shortlog.c b/builtin/shortlog.c
index 3898a2c9c4..ec1ace6ed5 100644
--- a/builtin/shortlog.c
+++ b/builtin/shortlog.c
@@ -29,15 +29,13 @@ static char const * const shortlog_usage[] = {
  */
 #define UTIL_TO_INT(x) ((intptr_t)(x)->util)
 
-static int compare_by_counter(const void *a1, const void *a2)
+DEFINE_SORT(static, sort_by_counter, struct string_list_item *, i1, i2)
 {
-	const struct string_list_item *i1 = a1, *i2 = a2;
 	return UTIL_TO_INT(i2) - UTIL_TO_INT(i1);
 }
 
-static int compare_by_list(const void *a1, const void *a2)
+DEFINE_SORT(static, sort_by_list, struct string_list_item *, i1, i2)
 {
-	const struct string_list_item *i1 = a1, *i2 = a2;
 	const struct string_list *l1 = i1->util, *l2 = i2->util;
 
 	if (l1->nr < l2->nr)
@@ -338,9 +336,12 @@ void shortlog_output(struct shortlog *log)
 	int i, j;
 	struct strbuf sb = STRBUF_INIT;
 
-	if (log->sort_by_number)
-		QSORT(log->list.items, log->list.nr,
-		      log->summary ? compare_by_counter : compare_by_list);
+	if (log->sort_by_number) {
+		if (log->summary)
+			sort_by_counter(log->list.items, log->list.nr);
+		else
+			sort_by_list(log->list.items, log->list.nr);
+	}
 	for (i = 0; i < log->list.nr; i++) {
 		const struct string_list_item *item = &log->list.items[i];
 		if (log->summary) {
diff --git a/commit-graph.c b/commit-graph.c
index 7f4519ec3b..fc2fbf22b4 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -550,10 +550,8 @@ static void write_graph_chunk_large_edges(struct hashfile *f,
 	}
 }
 
-static int commit_compare(const void *_a, const void *_b)
+DEFINE_SORT(static, sort_oids, struct object_id *, a, b)
 {
-	const struct object_id *a = (const struct object_id *)_a;
-	const struct object_id *b = (const struct object_id *)_b;
 	return oidcmp(a, b);
 }
 
@@ -780,7 +778,7 @@ void write_commit_graph(const char *obj_dir,
 
 	close_reachable(&oids);
 
-	QSORT(oids.list, oids.nr, commit_compare);
+	sort_oids(oids.list, oids.nr);
 
 	count_distinct = 1;
 	for (i = 1; i < oids.nr; i++) {
diff --git a/delta-islands.c b/delta-islands.c
index 8e5018e406..29232831d4 100644
--- a/delta-islands.c
+++ b/delta-islands.c
@@ -229,11 +229,9 @@ struct tree_islands_todo {
 	unsigned int depth;
 };
 
-static int tree_depth_compare(const void *a, const void *b)
+DEFINE_SORT(static, sort_by_tree_depth, struct tree_islands_todo *,
+	    todo_a, todo_b)
 {
-	const struct tree_islands_todo *todo_a = a;
-	const struct tree_islands_todo *todo_b = b;
-
 	return todo_a->depth - todo_b->depth;
 }
 
@@ -262,7 +260,7 @@ void resolve_tree_islands(int progress, struct packing_data *to_pack)
 			nr++;
 		}
 	}
-	QSORT(todo, nr, tree_depth_compare);
+	sort_by_tree_depth(todo, nr);
 
 	if (progress)
 		progress_state = start_progress(_("Propagating island marks"), nr);
diff --git a/diff.c b/diff.c
index f0c7557b40..ecb1961173 100644
--- a/diff.c
+++ b/diff.c
@@ -2924,10 +2924,8 @@ static long gather_dirstat(struct diff_options *opt, struct dirstat_dir *dir,
 	return sum_changes;
 }
 
-static int dirstat_compare(const void *_a, const void *_b)
+DEFINE_SORT(static, sort_by_name, struct dirstat_file *, a, b)
 {
-	const struct dirstat_file *a = _a;
-	const struct dirstat_file *b = _b;
 	return strcmp(a->name, b->name);
 }
 
@@ -3021,7 +3019,7 @@ static void show_dirstat(struct diff_options *options)
 		return;
 
 	/* Show all directories with more than x% of the changes */
-	QSORT(dir.files, dir.nr, dirstat_compare);
+	sort_by_name(dir.files, dir.nr);
 	gather_dirstat(options, &dir, changed, "", 0);
 }
 
@@ -3065,7 +3063,7 @@ static void show_dirstat_by_line(struct diffstat_t *data, struct diff_options *o
 		return;
 
 	/* Show all directories with more than x% of the changes */
-	QSORT(dir.files, dir.nr, dirstat_compare);
+	sort_by_name(dir.files, dir.nr);
 	gather_dirstat(options, &dir, changed, "", 0);
 }
 
diff --git a/diffcore-delta.c b/diffcore-delta.c
index c83d45a047..53ed2d5766 100644
--- a/diffcore-delta.c
+++ b/diffcore-delta.c
@@ -107,11 +107,8 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
 	}
 }
 
-static int spanhash_cmp(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_by_spanhash, struct spanhash *, a, b)
 {
-	const struct spanhash *a = a_;
-	const struct spanhash *b = b_;
-
 	/* A count of zero compares at the end.. */
 	if (!a->cnt)
 		return !b->cnt ? 0 : 1;
@@ -158,7 +155,7 @@ static struct spanhash_top *hash_chars(struct diff_filespec *one)
 		n = 0;
 		accum1 = accum2 = 0;
 	}
-	QSORT(hash->data, 1ul << hash->alloc_log2, spanhash_cmp);
+	sort_by_spanhash(hash->data, 1ul << hash->alloc_log2);
 	return hash;
 }
 
diff --git a/diffcore-order.c b/diffcore-order.c
index 19e73311f9..e8abd84242 100644
--- a/diffcore-order.c
+++ b/diffcore-order.c
@@ -78,11 +78,8 @@ static int match_order(const char *path)
 	return order_cnt;
 }
 
-static int compare_objs_order(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_by_order, struct obj_order *, a, b)
 {
-	struct obj_order const *a, *b;
-	a = (struct obj_order const *)a_;
-	b = (struct obj_order const *)b_;
 	if (a->order != b->order)
 		return a->order - b->order;
 	return a->orig_order - b->orig_order;
@@ -101,7 +98,7 @@ void order_objects(const char *orderfile, obj_path_fn_t obj_path,
 		objs[i].orig_order = i;
 		objs[i].order = match_order(obj_path(objs[i].obj));
 	}
-	QSORT(objs, nr, compare_objs_order);
+	sort_by_order(objs, nr);
 }
 
 static const char *pair_pathtwo(void *obj)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index daddd9b28a..0e256377db 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -234,10 +234,8 @@ static void record_rename_pair(int dst_index, int src_index, int score)
  * We sort the rename similarity matrix with the score, in descending
  * order (the most similar first).
  */
-static int score_compare(const void *a_, const void *b_)
+static int score_compare(const struct diff_score *a, const struct diff_score *b)
 {
-	const struct diff_score *a = a_, *b = b_;
-
 	/* sink the unused ones to the bottom */
 	if (a->dst < 0)
 		return (0 <= b->dst);
@@ -250,6 +248,11 @@ static int score_compare(const void *a_, const void *b_)
 	return b->score - a->score;
 }
 
+DEFINE_SORT(static, sort_by_score, struct diff_score *, a, b)
+{
+	return score_compare(a, b);
+}
+
 struct file_similarity {
 	struct hashmap_entry entry;
 	int index;
@@ -576,7 +579,7 @@ void diffcore_rename(struct diff_options *options)
 	stop_progress(&progress);
 
 	/* cost matrix sorted by most to least similar pair */
-	QSORT(mx, dst_cnt * NUM_CANDIDATE_PER_DST, score_compare);
+	sort_by_score(mx, dst_cnt * NUM_CANDIDATE_PER_DST);
 
 	rename_count += find_renames(mx, dst_cnt, minimum_score, 0);
 	if (detect_rename == DIFF_DETECT_COPY)
diff --git a/git-compat-util.h b/git-compat-util.h
index 5f2e90932f..491230fc57 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1066,6 +1066,21 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
 		qsort(base, nmemb, size, compar);
 }
 
+#define DECLARE_SORT(scope, name, elemtype) \
+scope void name(elemtype, size_t)
+
+#define DEFINE_SORT(scope, name, elemtype, one, two)			\
+static int name##_compare(const elemtype, const elemtype);		\
+static int name##_compare_void(const void *a, const void *b)		\
+{									\
+	return name##_compare(a, b);					\
+}									\
+scope void name(elemtype base, size_t nmemb)				\
+{									\
+	QSORT(base, nmemb, name##_compare_void);			\
+}									\
+static int name##_compare(const elemtype one, const elemtype two)
+
 #ifndef HAVE_ISO_QSORT_S
 int git_qsort_s(void *base, size_t nmemb, size_t size,
 		int (*compar)(const void *, const void *, void *), void *ctx);
diff --git a/help.c b/help.c
index 96f6d221ed..c50dd58943 100644
--- a/help.c
+++ b/help.c
@@ -90,11 +90,8 @@ static void print_command_list(const struct cmdname_help *cmds,
 	}
 }
 
-static int cmd_name_cmp(const void *elem1, const void *elem2)
+DEFINE_SORT(static, sort_help_by_name, struct cmdname_help *, e1, e2)
 {
-	const struct cmdname_help *e1 = elem1;
-	const struct cmdname_help *e2 = elem2;
-
 	return strcmp(e1->name, e2->name);
 }
 
@@ -114,7 +111,7 @@ static void print_cmd_by_category(const struct category_description *catdesc)
 		if (longest < strlen(cmds[i].name))
 			longest = strlen(cmds[i].name);
 	}
-	QSORT(cmds, nr, cmd_name_cmp);
+	sort_help_by_name(cmds, nr);
 
 	for (i = 0; catdesc[i].desc; i++) {
 		uint32_t mask = catdesc[i].category;
diff --git a/line-log.c b/line-log.c
index 72a5fed661..e240959166 100644
--- a/line-log.c
+++ b/line-log.c
@@ -72,11 +72,8 @@ void range_set_append(struct range_set *rs, long a, long b)
 	range_set_append_unsafe(rs, a, b);
 }
 
-static int range_cmp(const void *_r, const void *_s)
+DEFINE_SORT(static, sort_by_start, struct range *, r, s)
 {
-	const struct range *r = _r;
-	const struct range *s = _s;
-
 	/* this could be simply 'return r.start-s.start', but for the types */
 	if (r->start == s->start)
 		return 0;
@@ -113,7 +110,7 @@ void sort_and_merge_range_set(struct range_set *rs)
 	unsigned int i;
 	unsigned int o = 0; /* output cursor */
 
-	QSORT(rs->ranges, rs->nr, range_cmp);
+	sort_by_start(rs->ranges, rs->nr);
 
 	for (i = 0; i < rs->nr; i++) {
 		if (rs->ranges[i].start == rs->ranges[i].end)
diff --git a/midx.c b/midx.c
index 713d6f9dde..4407db7949 100644
--- a/midx.c
+++ b/midx.c
@@ -419,10 +419,8 @@ struct pack_pair {
 	char *pack_name;
 };
 
-static int pack_pair_compare(const void *_a, const void *_b)
+DEFINE_SORT(static, sort_by_pack_name, struct pack_pair *, a, b)
 {
-	struct pack_pair *a = (struct pack_pair *)_a;
-	struct pack_pair *b = (struct pack_pair *)_b;
 	return strcmp(a->pack_name, b->pack_name);
 }
 
@@ -438,7 +436,7 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
 		pairs[i].pack_name = pack_names[i];
 	}
 
-	QSORT(pairs, nr_packs, pack_pair_compare);
+	sort_by_pack_name(pairs, nr_packs);
 
 	for (i = 0; i < nr_packs; i++) {
 		pack_names[i] = pairs[i].pack_name;
@@ -455,10 +453,8 @@ struct pack_midx_entry {
 	uint64_t offset;
 };
 
-static int midx_oid_compare(const void *_a, const void *_b)
+DEFINE_SORT(static, sort_midx, struct pack_midx_entry *, a, b)
 {
-	const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
-	const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
 	int cmp = oidcmp(&a->oid, &b->oid);
 
 	if (cmp)
@@ -573,7 +569,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
 			}
 		}
 
-		QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
+		sort_midx(entries_by_fanout, nr_fanout);
 
 		/*
 		 * The batch is now sorted by OID and then mtime (descending).
diff --git a/pack-check.c b/pack-check.c
index fa5f0ff8fa..d4825f80e0 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -15,10 +15,8 @@ struct idx_entry {
 	unsigned int nr;
 };
 
-static int compare_entries(const void *e1, const void *e2)
+DEFINE_SORT(static, sort_by_offset, struct idx_entry *, entry1, entry2)
 {
-	const struct idx_entry *entry1 = e1;
-	const struct idx_entry *entry2 = e2;
 	if (entry1->offset < entry2->offset)
 		return -1;
 	if (entry1->offset > entry2->offset)
@@ -102,7 +100,7 @@ static int verify_packfile(struct packed_git *p,
 		entries[i].offset = nth_packed_object_offset(p, i);
 		entries[i].nr = i;
 	}
-	QSORT(entries, nr_objects, compare_entries);
+	sort_by_offset(entries, nr_objects);
 
 	for (i = 0; i < nr_objects; i++) {
 		void *data;
diff --git a/pathspec.c b/pathspec.c
index 6f005996fd..b559c3d783 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -490,12 +490,8 @@ static void init_pathspec_item(struct pathspec_item *item, unsigned flags,
 	}
 }
 
-static int pathspec_item_cmp(const void *a_, const void *b_)
+DEFINE_SORT(static, sort_by_match, struct pathspec_item *, a, b)
 {
-	struct pathspec_item *a, *b;
-
-	a = (struct pathspec_item *)a_;
-	b = (struct pathspec_item *)b_;
 	return strcmp(a->match, b->match);
 }
 
@@ -610,7 +606,7 @@ void parse_pathspec(struct pathspec *pathspec,
 	if (pathspec->magic & PATHSPEC_MAXDEPTH) {
 		if (flags & PATHSPEC_KEEP_ORDER)
 			BUG("PATHSPEC_MAXDEPTH_VALID and PATHSPEC_KEEP_ORDER are incompatible");
-		QSORT(pathspec->items, pathspec->nr, pathspec_item_cmp);
+		sort_by_match(pathspec->items, pathspec->nr);
 	}
 }
 
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 74e2996e93..1339ffd8c3 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -271,9 +271,9 @@ struct snapshot_record {
 	size_t len;
 };
 
-static int cmp_packed_ref_records(const void *v1, const void *v2)
+static int cmp_packed_ref_records(const struct snapshot_record *e1,
+				  const struct snapshot_record *e2)
 {
-	const struct snapshot_record *e1 = v1, *e2 = v2;
 	const char *r1 = e1->start + GIT_SHA1_HEXSZ + 1;
 	const char *r2 = e2->start + GIT_SHA1_HEXSZ + 1;
 
@@ -291,6 +291,11 @@ static int cmp_packed_ref_records(const void *v1, const void *v2)
 	}
 }
 
+DEFINE_SORT(static, sort_packed_ref_records, struct snapshot_record *, a, b)
+{
+	return cmp_packed_ref_records(a, b);
+}
+
 /*
  * Compare a snapshot record at `rec` to the specified NUL-terminated
  * refname.
@@ -380,7 +385,7 @@ static void sort_snapshot(struct snapshot *snapshot)
 		goto cleanup;
 
 	/* We need to sort the memory. First we sort the records array: */
-	QSORT(records, nr, cmp_packed_ref_records);
+	sort_packed_ref_records(records, nr);
 
 	/*
 	 * Allocate a new chunk of memory, and copy the old memory to
diff --git a/sha1-array.c b/sha1-array.c
index b94e0ec0f5..2dcaafe9dc 100644
--- a/sha1-array.c
+++ b/sha1-array.c
@@ -9,14 +9,14 @@ void oid_array_append(struct oid_array *array, const struct object_id *oid)
 	array->sorted = 0;
 }
 
-static int void_hashcmp(const void *a, const void *b)
+DEFINE_SORT(static, sort_by_oid, struct object_id *, a, b)
 {
 	return oidcmp(a, b);
 }
 
 static void oid_array_sort(struct oid_array *array)
 {
-	QSORT(array->oid, array->nr, void_hashcmp);
+	sort_by_oid(array->oid, array->nr);
 	array->sorted = 1;
 }
 
diff --git a/sha1-name.c b/sha1-name.c
index faa60f69e3..39fc68d3c6 100644
--- a/sha1-name.c
+++ b/sha1-name.c
@@ -412,7 +412,7 @@ static int collect_ambiguous(const struct object_id *oid, void *data)
 	return 0;
 }
 
-static int sort_ambiguous(const void *a, const void *b)
+DEFINE_SORT(static, sort_ambiguous, struct object_id *, a, b)
 {
 	int a_type = oid_object_info(the_repository, a, NULL);
 	int b_type = oid_object_info(the_repository, b, NULL);
@@ -486,7 +486,7 @@ static int get_short_oid(const char *name, int len, struct object_id *oid,
 
 		advise(_("The candidates are:"));
 		for_each_abbrev(ds.hex_pfx, collect_ambiguous, &collect);
-		QSORT(collect.oid, collect.nr, sort_ambiguous);
+		sort_ambiguous(collect.oid, collect.nr);
 
 		if (oid_array_for_each(&collect, show_ambiguous_object, &ds))
 			BUG("show_ambiguous_object shouldn't return non-zero");
-- 
2.19.1

^ permalink raw reply related	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-14 14:29                     ` René Scharfe
@ 2018-10-15 15:31                       ` Derrick Stolee
  2018-10-15 16:26                         ` René Scharfe
  2018-10-16 23:09                       ` Junio C Hamano
  2018-10-17  8:33                       ` Jeff King
  2 siblings, 1 reply; 118+ messages in thread
From: Derrick Stolee @ 2018-10-15 15:31 UTC (permalink / raw)
  To: René Scharfe, Jeff King
  Cc: Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee

On 10/14/2018 10:29 AM, René Scharfe wrote:
> It still has some repetition, converted code is a bit longer than the
> current one, and I don't know how to build a Coccinelle rule that would
> do that conversion.
>
> Looked for a possibility to at least leave QSORT call-sites alone by
> enhancing that macro, but didn't find any.  Found a few websites
> showing off mindblowing macros, thouhg, this one in particular:
>
> https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms
>
> Anyway, drove the generative approach a bit further, and came up with
> the new DEFINE_SORT below.  I'm unsure about the name; perhaps it should
> be called DEFINE_SORT_BY_COMPARE_FUNCTION_BODY, but that's a bit long.
> It handles casts and const attributes behind the scenes and avoids
> repetition, but looks a bit weird, as it is placed where a function
> signature would go.
>
> Apart from that the macro is simple and doesn't use any tricks or
> added checks.  It just sets up boilerplate functions to offer type-safe
> sorting.
>
> diffcore-rename.c and refs/packed-backend.c receive special treatment in
> the patch because their compare functions are used outside of sorting as
> well.  I made them take typed pointers nevertheless and used them from
> DEFINE_SORT; the wrapper generated by that macro is supposed to be
> private.  Given that such reuse is rare and I think we don't need a way
> to make it public.
>
> What do y'all think about this direction?
>
> ---
[snip]
> diff --git a/git-compat-util.h b/git-compat-util.h
> index 5f2e90932f..491230fc57 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -1066,6 +1066,21 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>   		qsort(base, nmemb, size, compar);
>   }
>   
> +#define DECLARE_SORT(scope, name, elemtype) \
> +scope void name(elemtype, size_t)
> +
> +#define DEFINE_SORT(scope, name, elemtype, one, two)			\
> +static int name##_compare(const elemtype, const elemtype);		\
> +static int name##_compare_void(const void *a, const void *b)		\
> +{									\
> +	return name##_compare(a, b);					\
> +}									\
> +scope void name(elemtype base, size_t nmemb)				\
> +{									\
> +	QSORT(base, nmemb, name##_compare_void);			\
> +}									\
> +static int name##_compare(const elemtype one, const elemtype two)
> +

Since you were worried about the "private" name of the compare function, 
maybe split this macro into two: DEFINE_COMPARE and DEFINE_SORT. Then, 
if someone wants direct access to the compare function, they could use 
the DEFINE_COMPARE to ensure the typing is correct, and use QSORT as 
normal with name##_compare_void.

As I think about this, I think this is less of a problem than is worth 
this split. The commit-slab definitions generate a lot of methods using 
the "name##" convention, so perhaps we should just trust developers 
using the macros to look up the macro definition or similar examples. In 
that sense, including a conversion that consumes the compare function 
directly can be a signpost for future callers.

I would say that maybe the times where you need to do something special 
should be pulled out into their own patches, so we can call attention to 
them directly.

[snip]
> diff --git a/midx.c b/midx.c
> index 713d6f9dde..4407db7949 100644
> --- a/midx.c
> +++ b/midx.c
> @@ -419,10 +419,8 @@ struct pack_pair {
>   	char *pack_name;
>   };
>   
> -static int pack_pair_compare(const void *_a, const void *_b)
> +DEFINE_SORT(static, sort_by_pack_name, struct pack_pair *, a, b)
>   {
> -	struct pack_pair *a = (struct pack_pair *)_a;
> -	struct pack_pair *b = (struct pack_pair *)_b;
>   	return strcmp(a->pack_name, b->pack_name);
>   }
>   
> @@ -438,7 +436,7 @@ static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *p
>   		pairs[i].pack_name = pack_names[i];
>   	}
>   
> -	QSORT(pairs, nr_packs, pack_pair_compare);
> +	sort_by_pack_name(pairs, nr_packs);

I like this "sort_by_" convention..

>   
>   	for (i = 0; i < nr_packs; i++) {
>   		pack_names[i] = pairs[i].pack_name;
> @@ -455,10 +453,8 @@ struct pack_midx_entry {
>   	uint64_t offset;
>   };
>   
> -static int midx_oid_compare(const void *_a, const void *_b)
> +DEFINE_SORT(static, sort_midx, struct pack_midx_entry *, a, b)
>   {
> -	const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
> -	const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
>   	int cmp = oidcmp(&a->oid, &b->oid);
>   
>   	if (cmp)
> @@ -573,7 +569,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
>   			}
>   		}
>   
> -		QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
> +		sort_midx(entries_by_fanout, nr_fanout);

...but it isn't followed here. Perhaps "sort_by_oid"?

Thanks,
-Stolee

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-15 15:31                       ` Derrick Stolee
@ 2018-10-15 16:26                         ` René Scharfe
  0 siblings, 0 replies; 118+ messages in thread
From: René Scharfe @ 2018-10-15 16:26 UTC (permalink / raw)
  To: Derrick Stolee, Jeff King
  Cc: Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee

Am 15.10.2018 um 17:31 schrieb Derrick Stolee:
> On 10/14/2018 10:29 AM, René Scharfe wrote:
>> diff --git a/git-compat-util.h b/git-compat-util.h
>> index 5f2e90932f..491230fc57 100644
>> --- a/git-compat-util.h
>> +++ b/git-compat-util.h
>> @@ -1066,6 +1066,21 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>>   		qsort(base, nmemb, size, compar);
>>   }
>>   
>> +#define DECLARE_SORT(scope, name, elemtype) \
>> +scope void name(elemtype, size_t)
>> +
>> +#define DEFINE_SORT(scope, name, elemtype, one, two)			\
>> +static int name##_compare(const elemtype, const elemtype);		\
>> +static int name##_compare_void(const void *a, const void *b)		\
>> +{									\
>> +	return name##_compare(a, b);					\
>> +}									\
>> +scope void name(elemtype base, size_t nmemb)				\
>> +{									\
>> +	QSORT(base, nmemb, name##_compare_void);			\
>> +}									\
>> +static int name##_compare(const elemtype one, const elemtype two)
>> +
> 
> Since you were worried about the "private" name of the compare function, 
> maybe split this macro into two: DEFINE_COMPARE and DEFINE_SORT. Then, 
> if someone wants direct access to the compare function, they could use 
> the DEFINE_COMPARE to ensure the typing is correct, and use QSORT as 
> normal with name##_compare_void.

The pointers are converted to const void * somewhere along the way from
qsort() to compare function.  Splitting the macro would require type
check tricks to make sure the types of the compare function matches the
array to be sorted.  Letting a single macro bake it all into a layer
cake of generated functions is a lot simpler.

> As I think about this, I think this is less of a problem than is worth 
> this split. The commit-slab definitions generate a lot of methods using 
> the "name##" convention, so perhaps we should just trust developers 
> using the macros to look up the macro definition or similar examples. In 
> that sense, including a conversion that consumes the compare function 
> directly can be a signpost for future callers.

Using the generated compare function name directly is a bit awkward; e.g.
in the two example cases it would be sort_by_score_compare() and
sort_packed_ref_records_compare().  Defining the real compare function
the usual way (with a proper name) and having the DEFINE_SORT block call
it is a bit more repetitive, but clean and understandable IMHO.

We also could just leave complicated cases alone..

> I would say that maybe the times where you need to do something special 
> should be pulled out into their own patches, so we can call attention to 
> them directly.

Right; this patch was just a sketch.

> I like this "sort_by_" convention..
> 
>>   
>>   	for (i = 0; i < nr_packs; i++) {
>>   		pack_names[i] = pairs[i].pack_name;
>> @@ -455,10 +453,8 @@ struct pack_midx_entry {
>>   	uint64_t offset;
>>   };
>>   
>> -static int midx_oid_compare(const void *_a, const void *_b)
>> +DEFINE_SORT(static, sort_midx, struct pack_midx_entry *, a, b)
>>   {
>> -	const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
>> -	const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
>>   	int cmp = oidcmp(&a->oid, &b->oid);
>>   
>>   	if (cmp)
>> @@ -573,7 +569,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
>>   			}
>>   		}
>>   
>> -		QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
>> +		sort_midx(entries_by_fanout, nr_fanout);
> 
> ...but it isn't followed here. Perhaps "sort_by_oid"?

That function sorts by oid, pack_mtime, and pack_int_id, but including
all these fields in the name is a bit unwieldy.  Being unspecific by
calling it sort_midx() was the lazy way out.  Mentioning only oid is a
bit misleading.  Perhaps sort_by_oid_etc()?

René


^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-14 14:29                     ` René Scharfe
  2018-10-15 15:31                       ` Derrick Stolee
@ 2018-10-16 23:09                       ` Junio C Hamano
  2018-10-17  8:33                       ` Jeff King
  2 siblings, 0 replies; 118+ messages in thread
From: Junio C Hamano @ 2018-10-16 23:09 UTC (permalink / raw)
  To: René Scharfe
  Cc: Jeff King, Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Derrick Stolee

René Scharfe <l.s.r@web.de> writes:

> Apart from that the macro is simple and doesn't use any tricks or
> added checks.  It just sets up boilerplate functions to offer type-safe
> sorting.
> ...
> diff --git a/git-compat-util.h b/git-compat-util.h
> index 5f2e90932f..491230fc57 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -1066,6 +1066,21 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
>  		qsort(base, nmemb, size, compar);
>  }
>  
> +#define DECLARE_SORT(scope, name, elemtype) \
> +scope void name(elemtype, size_t)
> +
> +#define DEFINE_SORT(scope, name, elemtype, one, two)			\
> +static int name##_compare(const elemtype, const elemtype);		\
> +static int name##_compare_void(const void *a, const void *b)		\
> +{									\
> +	return name##_compare(a, b);					\
> +}									\
> +scope void name(elemtype base, size_t nmemb)				\
> +{									\
> +	QSORT(base, nmemb, name##_compare_void);			\
> +}									\
> +static int name##_compare(const elemtype one, const elemtype two)

... and here comes the body of the comparison function that takes
two "things" we are about, i.e. elements of the array being sorted.

Quite cleanly done and the result looks pleasant, at least to me.

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-14 14:29                     ` René Scharfe
  2018-10-15 15:31                       ` Derrick Stolee
  2018-10-16 23:09                       ` Junio C Hamano
@ 2018-10-17  8:33                       ` Jeff King
  2020-11-18  2:16                         ` Jonathan Nieder
  2 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2018-10-17  8:33 UTC (permalink / raw)
  To: René Scharfe
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

On Sun, Oct 14, 2018 at 04:29:06PM +0200, René Scharfe wrote:

> Anyway, drove the generative approach a bit further, and came up with
> the new DEFINE_SORT below.  I'm unsure about the name; perhaps it should
> be called DEFINE_SORT_BY_COMPARE_FUNCTION_BODY, but that's a bit long.
> It handles casts and const attributes behind the scenes and avoids
> repetition, but looks a bit weird, as it is placed where a function
> signature would go.
> 
> Apart from that the macro is simple and doesn't use any tricks or
> added checks.  It just sets up boilerplate functions to offer type-safe
> sorting.
> 
> diffcore-rename.c and refs/packed-backend.c receive special treatment in
> the patch because their compare functions are used outside of sorting as
> well.  I made them take typed pointers nevertheless and used them from
> DEFINE_SORT; the wrapper generated by that macro is supposed to be
> private.  Given that such reuse is rare and I think we don't need a way
> to make it public.
> 
> What do y'all think about this direction?

I think it's the best we're likely to do, and is an improvement on the
status quo.

The patch looks overall sane to me. I think DEFINE_SORT() is a fine
name.

I think given a macro parameter "foo" you could generate sort_by_foo()
and compare_foo(), which would eliminate the extra layer in those
two cases you mentioned. But I'm also fine with the approach you've
shown here.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2018-10-17  8:33                       ` Jeff King
@ 2020-11-18  2:16                         ` Jonathan Nieder
  2020-11-18  6:54                           ` Jeff King
  0 siblings, 1 reply; 118+ messages in thread
From: Jonathan Nieder @ 2020-11-18  2:16 UTC (permalink / raw)
  To: Jeff King
  Cc: René Scharfe, Derrick Stolee,
	Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee

Hi,

Jeff King wrote:
> On Sun, Oct 14, 2018 at 04:29:06PM +0200, René Scharfe wrote:

>> Anyway, drove the generative approach a bit further, and came up with
>> the new DEFINE_SORT below.  I'm unsure about the name; perhaps it should
>> be called DEFINE_SORT_BY_COMPARE_FUNCTION_BODY, but that's a bit long.
>> It handles casts and const attributes behind the scenes and avoids
>> repetition, but looks a bit weird, as it is placed where a function
>> signature would go.
>>
>> Apart from that the macro is simple and doesn't use any tricks or
>> added checks.  It just sets up boilerplate functions to offer type-safe
>> sorting.
>>
>> diffcore-rename.c and refs/packed-backend.c receive special treatment in
>> the patch because their compare functions are used outside of sorting as
>> well.  I made them take typed pointers nevertheless and used them from
>> DEFINE_SORT; the wrapper generated by that macro is supposed to be
>> private.  Given that such reuse is rare and I think we don't need a way
>> to make it public.
>>
>> What do y'all think about this direction?
>
> I think it's the best we're likely to do, and is an improvement on the
> status quo.
>
> The patch looks overall sane to me. I think DEFINE_SORT() is a fine
> name.

Since this came up in [1], I took a glance at this.

I also think it looks reasonable, though it's possible to do better if
we're willing to (1) cast between pointers to function with different
signatures, which is portable in practice but I don't believe the C
standard speaks to and (2) conditionally make use of gcc extensions,
for typechecking.

For example, CCAN's asort[2] does typechecking on the arrays passed in
and the callback cookie parameter to qsort_r, with no extra
boilerplate or run-time overhead involved[3].

(The core of that macro is ccan's typesafe_cb_cast[4]:

  /* CC0 (Public domain) - see LICENSE file for details */

  #if HAVE_TYPEOF && HAVE_BUILTIN_CHOOSE_EXPR && HAVE_BUILTIN_TYPES_COMPATIBLE_P
  /**
   * typesafe_cb_cast - only cast an expression if it matches a given type
   * @desttype: the type to cast to
   * @oktype: the type we allow
   * @expr: the expression to cast
   *
   * This macro is used to create functions which allow multiple types.
   * The result of this macro is used somewhere that a @desttype type is
   * expected: if @expr is exactly of type @oktype, then it will be
   * cast to @desttype type, otherwise left alone.
   *
   * This macro can be used in static initializers.
   *
   * This is merely useful for warnings: if the compiler does not
   * support the primitives required for typesafe_cb_cast(), it becomes an
   * unconditional cast, and the @oktype argument is not used.  In
   * particular, this means that @oktype can be a type which uses the
   * "typeof": it will not be evaluated if typeof is not supported.
   *
   * Example:
   *      // We can take either an unsigned long or a void *.
   *      void _set_some_value(void *val);
   *      #define set_some_value(e)                       \
   *              _set_some_value(typesafe_cb_cast(void *, unsigned long, (e)))
   */
  #define typesafe_cb_cast(desttype, oktype, expr)                        \
          __builtin_choose_expr(                                          \
                  __builtin_types_compatible_p(__typeof__(0?(expr):(expr)), \
                                               oktype),                   \
                  (desttype)(expr), (expr))
  #else
  #define typesafe_cb_cast(desttype, oktype, expr) ((desttype)(expr))
  #endif
)

Thanks,
Jonathan

[1] https://lore.kernel.org/git/20201117223011.GA642234@coredump.intra.peff.net/
[2] https://git.ozlabs.org/?p=ccan;a=blob;f=ccan/asort/asort.h;hb=HEAD
[3] https://ccodearchive.net/info/asort.html
[4] https://git.ozlabs.org/?p=ccan;a=blob;f=ccan/typesafe_cb/typesafe_cb.h;hb=HEAD

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2020-11-18  2:16                         ` Jonathan Nieder
@ 2020-11-18  6:54                           ` Jeff King
  2020-11-18 17:47                             ` René Scharfe
  0 siblings, 1 reply; 118+ messages in thread
From: Jeff King @ 2020-11-18  6:54 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: René Scharfe, Derrick Stolee,
	Derrick Stolee via GitGitGadget, git, Junio C Hamano,
	Derrick Stolee

On Tue, Nov 17, 2020 at 06:16:05PM -0800, Jonathan Nieder wrote:

> Since this came up in [1], I took a glance at this.
> 
> I also think it looks reasonable, though it's possible to do better if
> we're willing to (1) cast between pointers to function with different
> signatures, which is portable in practice but I don't believe the C
> standard speaks to and (2) conditionally make use of gcc extensions,
> for typechecking.

The C standard definitely is not OK with calling a function through a
wrong declaration or cast. I won't find chapter and verse, but here's a
practical example:

-- >8 --
#include <stdio.h>
#include <stdint.h>

void foo(uint32_t a, uint32_t b)
{
	printf("got a = %u\n", a);
	printf("got b = %u\n", b);
}

typedef void (*almost_foo)(uint64_t, uint64_t);

int main(void)
{
	almost_foo bar = (almost_foo)foo;

	printf("real call:\n");
	foo(1, 2);
	printf("via cast:\n");
	bar(3, 4);
	return 0;
}
-- >8 --

The caller thinks it's passing uint64_t integers, but the function
thinks it's getting uint32_t integers. The output will depend on your
calling conventions. If I compile it on my 64-bit Linux machine, it
produces what you'd expect:

  $ gcc foo.c
  $ ./a.out
  real call:
  got a = 1
  got b = 2
  via cast:
  got a = 3
  got b = 4

That's because we're using the System V AMD64 ABI convention, which
passes the first six parameters via registers. And even after that, each
parameter on the stack uses 8 bytes (even if it's smaller), so the two
representations are equivalent.

But if I compile it in 32-bit mode, it doesn't work:

  $ gcc -m32 foo.c
  $ ./a.out
  real call:
  got a = 1
  got b = 2
  via cast:
  got a = 3
  got b = 0

That's because it's using the cdecl convention, which puts everything on
the stack, and which uses a minimum of 4 bytes per parameter. So each
64-bit value results in two 32-bit pushes onto the stack (of 0, and 3).

Now in practice you're probably fine as long as the number and sizes of
the parameters are the same between the function definition and what the
caller casts to. And so if we're talking about casting individual
parameters between a void parameter and another pointer, that would
usually be fine (in practice; the standard only says that void can store
the type of anything, so it _could_ be larger than some other pointers.
I don't know of any modern systems where this is true, though).

Which is all a roundabout way of saying that yes, I think this kind of
cast is probably OK in practice.

I _think_ the ccan type-checking macro you pointed to would catch this
sufficiently on systems with typeof() that it would also protect systems
with different calling conventions. But I admit it's pretty dense.

So I dunno. The nice thing is that this puts the ugliness all inside of
QSORT(), which becomes magically type-safe. But it involves importing a
lot of tricky bits under the hood.

The downside of René's patch is that it hides the declaration of the
comparison function (and the typesafe wrapper) inside a macro. But the
resulting code is (IMHO) pretty easy to comprehend.

-Peff

^ permalink raw reply	[flat|nested] 118+ messages in thread

* Re: [PATCH 15/16] commit-reach: make can_all_from_reach... linear
  2020-11-18  6:54                           ` Jeff King
@ 2020-11-18 17:47                             ` René Scharfe
  0 siblings, 0 replies; 118+ messages in thread
From: René Scharfe @ 2020-11-18 17:47 UTC (permalink / raw)
  To: Jeff King, Jonathan Nieder
  Cc: Derrick Stolee, Derrick Stolee via GitGitGadget, git,
	Junio C Hamano, Derrick Stolee

Am 18.11.20 um 07:54 schrieb Jeff King:
> On Tue, Nov 17, 2020 at 06:16:05PM -0800, Jonathan Nieder wrote:
>
>> Since this came up in [1], I took a glance at this.
>>
>> I also think it looks reasonable, though it's possible to do better if
>> we're willing to (1) cast between pointers to function with different
>> signatures, which is portable in practice but I don't believe the C
>> standard speaks to and (2) conditionally make use of gcc extensions,
>> for typechecking.
>
> The C standard definitely is not OK with calling a function through a
> wrong declaration or cast. I won't find chapter and verse

http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1256.pdf is a draft for
C99, and says under 6.3 Conversions, 6.3.2.3 Pointers, paragraph 8:

   If a converted pointer is used to call a function whose type is not
   compatible with the pointed-to type, the behavior is undefined.

> Now in practice you're probably fine as long as the number and sizes of
> the parameters are the same between the function definition and what the
> caller casts to. And so if we're talking about casting individual
> parameters between a void parameter and another pointer, that would
> usually be fine (in practice; the standard only says that void can store
> the type of anything, so it _could_ be larger than some other pointers.
> I don't know of any modern systems where this is true, though).
>
> Which is all a roundabout way of saying that yes, I think this kind of
> cast is probably OK in practice.
>
> I _think_ the ccan type-checking macro you pointed to would catch this
> sufficiently on systems with typeof() that it would also protect systems
> with different calling conventions. But I admit it's pretty dense.
>
> So I dunno. The nice thing is that this puts the ugliness all inside of
> QSORT(), which becomes magically type-safe. But it involves importing a
> lot of tricky bits under the hood.

A generic and type-safe QSORT would be nice, but if it calls a function
via a converted pointer then it's technically relying on undefined
behavior unless I misunderstand the standard.  I prefer occasional
mistakes (that are caught by ASan or USan or when the sort order actually
matters) to guaranteed undefined behavior that happens to work, until it
doesn't.

> The downside of René's patch is that it hides the declaration of the
> comparison function (and the typesafe wrapper) inside a macro. But the
> resulting code is (IMHO) pretty easy to comprehend.

I tried some more variants back then, before I dropped the ball
eventually when RL distracted me.  I think my favorite one was a
DEFINE_SORT macro that took the name of a typed comparison function --
the code looks more like normal C.

Handling arrays of pointers was a bit tricky, and I had to introduce
DEFINE_PTR_SORT and DEFINE_CONST_PTR_SORT for them, but they allowed to
use the same comparison functions -- they consistently took element
pointers.  Just one rule to follow, and the compiler would yell when
a mismatched macro was used.

And at that point it occurred to me that comparison functions would
ideally take two elements, not pointers.  All the pointer mangling
could be done in generated code.  GCC and Clang inline it
appropriately, so this convenience would be free -- but other
compilers don't, and that would make sorting more expensive on those
platforms.  Dead end.

The last one I looked at was a dumbed-down version, but I think this
requires some weird comparison function signatures in some cases
(patch below, basically untested).

I can understand now why monomorphization approaches like
https://github.com/attractivechaos/klib/blob/master/ksort.h seem
attractive (no pun intended)..

René

---
 git-compat-util.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/git-compat-util.h b/git-compat-util.h
index adfea06897..8d871a8e33 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1124,6 +1124,23 @@ int git_qsort_s(void *base, size_t nmemb, size_t size,
 		BUG("qsort_s() failed");			\
 } while (0)

+#define DECLARE_COMPARE(scope, compar)					\
+scope int compar##__void(const void *, const void *)
+
+#define DEFINE_COMPARE(scope, compar)					\
+scope int compar##__void(const void *a_void_ptr,			\
+			 const void *b_void_ptr)			\
+{									\
+	return compar(a_void_ptr, b_void_ptr);				\
+}									\
+DECLARE_COMPARE(scope, compar)
+
+#define GET_COMPARE(base, compar)					\
+(0 && compar((base), (base)) ? NULL : compar##__void)
+
+#define SORT_ARRAY(base, n, compar)					\
+QSORT((base), (n), GET_COMPARE((base), compar))
+
 #ifndef REG_STARTEND
 #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
 #endif
--
2.29.2

^ permalink raw reply related	[flat|nested] 118+ messages in thread

end of thread, other threads:[~2020-11-18 17:54 UTC | newest]

Thread overview: 118+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-16 13:00 [PATCH 00/16] Consolidate reachability logic Derrick Stolee via GitGitGadget
2018-06-19 20:25 ` [PATCH 04/16] upload-pack: make reachable() more generic Derrick Stolee via GitGitGadget
2018-06-19 20:35 ` [PATCH 05/16] upload-pack: refactor ok_to_give_up() Derrick Stolee via GitGitGadget
2018-06-25 17:16 ` [PATCH 01/16] commit-reach: move walk methods from commit.c Derrick Stolee via GitGitGadget
2018-07-16 18:57   ` Stefan Beller
2018-07-16 21:31   ` Jonathan Tan
2018-06-25 17:35 ` [PATCH 02/16] commit-reach: move ref_newer from remote.c Derrick Stolee via GitGitGadget
2018-07-16 19:10   ` Stefan Beller
2018-06-25 18:01 ` [PATCH 03/16] commit-reach: move commit_contains from ref-filter Derrick Stolee via GitGitGadget
2018-07-16 19:14   ` Stefan Beller
2018-06-28 12:31 ` [PATCH 15/16] commit-reach: make can_all_from_reach... linear Derrick Stolee via GitGitGadget
2018-07-16 22:37   ` Stefan Beller
2018-07-17  1:16   ` Jonathan Tan
2018-10-01 19:16   ` René Scharfe
2018-10-01 19:26     ` Derrick Stolee
2018-10-01 20:37       ` René Scharfe
2018-10-04 22:59         ` René Scharfe
2018-10-05 12:15           ` Derrick Stolee
2018-10-05 16:51           ` Jeff King
2018-10-05 18:48             ` René Scharfe
2018-10-05 19:08               ` Jeff King
2018-10-05 19:36                 ` René Scharfe
2018-10-05 19:42                   ` Jeff King
2018-10-14 14:29                     ` René Scharfe
2018-10-15 15:31                       ` Derrick Stolee
2018-10-15 16:26                         ` René Scharfe
2018-10-16 23:09                       ` Junio C Hamano
2018-10-17  8:33                       ` Jeff King
2020-11-18  2:16                         ` Jonathan Nieder
2020-11-18  6:54                           ` Jeff King
2020-11-18 17:47                             ` René Scharfe
2018-10-05 19:12             ` Ævar Arnfjörð Bjarmason
2018-10-05 19:28               ` Jeff King
2018-10-05 19:42                 ` Ævar Arnfjörð Bjarmason
2018-10-05 19:44                   ` Jeff King
2018-07-12 20:47 ` [PATCH 06/16] upload-pack: generalize commit date cutoff Derrick Stolee via GitGitGadget
2018-07-16 19:38   ` Stefan Beller
2018-07-18 16:04     ` Derrick Stolee
2018-07-12 20:52 ` [PATCH 07/16] commit-reach: move can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
2018-07-16 22:37   ` Jonathan Tan
2018-07-13 14:06 ` [PATCH 08/16] test-reach: create new test tool for ref_newer Derrick Stolee via GitGitGadget
2018-07-16 23:00   ` Jonathan Tan
2018-07-18 16:14     ` Derrick Stolee
2018-07-13 14:28 ` [PATCH 09/16] test-reach: test in_merge_bases Derrick Stolee via GitGitGadget
2018-07-13 14:38 ` [PATCH 10/16] test-reach: test is_descendant_of Derrick Stolee via GitGitGadget
2018-07-13 14:51 ` [PATCH 11/16] test-reach: test get_merge_bases_many Derrick Stolee via GitGitGadget
2018-07-16 21:24   ` Stefan Beller
2018-07-16 23:08   ` Jonathan Tan
2018-07-13 16:51 ` [PATCH 12/16] test-reach: test reduce_heads Derrick Stolee via GitGitGadget
2018-07-16 21:30   ` Stefan Beller
2018-07-16 21:59     ` Eric Sunshine
2018-07-13 17:22 ` [PATCH 13/16] test-reach: test can_all_from_reach_with_flags Derrick Stolee via GitGitGadget
2018-07-16 21:54   ` Stefan Beller
2018-07-18 16:54     ` Derrick Stolee
2018-07-17  0:10   ` Jonathan Tan
2018-07-13 18:37 ` [PATCH 14/16] commit-reach: replace ref_newer logic Derrick Stolee via GitGitGadget
2018-07-16 22:16   ` Stefan Beller
2018-07-13 19:25 ` [PATCH 16/16] commit-reach: use can_all_from_reach Derrick Stolee via GitGitGadget
2018-07-16 22:47   ` Stefan Beller
2018-07-16 13:54 ` [PATCH 00/16] Consolidate reachability logic Ramsay Jones
2018-07-16 16:18   ` Jeff King
2018-07-16 18:40     ` Eric Sunshine
2018-07-16 18:56       ` Jeff King
2018-07-16 18:59         ` Eric Sunshine
2018-07-18 12:32           ` Johannes Schindelin
2018-07-18 12:23     ` Johannes Schindelin
2018-07-18 19:21       ` Jeff King
2018-07-19 16:34         ` Johannes Schindelin
2018-07-16 17:26   ` Stefan Beller
2018-07-16 18:44     ` Eric Sunshine
2018-07-16 18:47       ` Derrick Stolee
2018-07-18 12:28         ` Johannes Schindelin
2018-07-18 15:01           ` Duy Nguyen
2018-07-18 17:01             ` Junio C Hamano
2018-07-18 17:11               ` Derrick Stolee
2018-07-19 16:37                 ` Johannes Schindelin
2018-07-19 16:32               ` Johannes Schindelin
2018-07-20 16:33 ` [PATCH v2 00/18] " Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 01/18] commit-reach: move walk methods from commit.c Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 02/18] commit.h: remove method declarations Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 03/18] commit-reach: move ref_newer from remote.c Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 04/18] commit-reach: move commit_contains from ref-filter Derrick Stolee
2018-08-28 21:24     ` Jonathan Nieder
2018-08-28 21:33       ` Derrick Stolee
2018-08-28 21:36       ` [PATCH] commit-reach: correct accidental #include of C file Jonathan Nieder
2018-08-28 21:39         ` Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 05/18] upload-pack: make reachable() more generic Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 06/18] upload-pack: refactor ok_to_give_up() Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 07/18] upload-pack: generalize commit date cutoff Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 08/18] commit-reach: move can_all_from_reach_with_flags Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 09/18] test-reach: create new test tool for ref_newer Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 10/18] test-reach: test in_merge_bases Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 11/18] test-reach: test is_descendant_of Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 12/18] test-reach: test get_merge_bases_many Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 13/18] test-reach: test reduce_heads Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 14/18] test-reach: test can_all_from_reach_with_flags Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 15/18] test-reach: test commit_contains Derrick Stolee
2018-07-23 20:35     ` Jonathan Tan
2018-07-25 18:08       ` Junio C Hamano
2018-07-25 18:30         ` Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 16/18] commit-reach: replace ref_newer logic Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 17/18] commit-reach: make can_all_from_reach... linear Derrick Stolee
2018-07-23 20:41     ` Jonathan Tan
2018-08-01 20:41       ` Derrick Stolee
2018-09-12  4:14     ` Jeff King
2018-09-12  4:29       ` Jeff King
2018-09-12 13:08         ` Derrick Stolee
2018-07-20 16:33   ` [PATCH v2 18/18] commit-reach: use can_all_from_reach Derrick Stolee
2018-07-20 17:10   ` [PATCH v2 00/18] Consolidate reachability logic Stefan Beller
2018-07-20 17:15     ` Derrick Stolee
2018-07-20 22:16       ` Stefan Beller
2018-08-01 20:33         ` Derrick Stolee
2018-07-20 17:18   ` Derrick Stolee
2018-07-20 18:09     ` Eric Sunshine
2018-07-20 19:14       ` Derrick Stolee
2018-07-20 17:41   ` Duy Nguyen
2018-07-20 19:09     ` Derrick Stolee
2018-07-20 22:45   ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).