git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Stephan Beyer <s-beyer@gmx.net>
To: git@vger.kernel.org
Cc: Stephan Beyer <s-beyer@gmx.net>,
	Christian Couder <christian.couder@gmail.com>,
	Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 19/21] bisect: use a bottom-up traversal to find relevant weights
Date: Sun, 10 Apr 2016 15:19:12 +0200	[thread overview]
Message-ID: <1460294354-7031-20-git-send-email-s-beyer@gmx.net> (raw)
In-Reply-To: <1460294354-7031-1-git-send-email-s-beyer@gmx.net>

The idea is to reverse the DAG and perform a traversal
starting on all sources of the reversed DAG.

We walk from the bottom commits, incrementing the weight while
walking on a part of the graph that is single strand of pearls,
or doing the "count the reachable ones the hard way" using
compute_weight() when we hit a merge commit.

A traversal ends when the computed weight is falling or halfway.
This way, commits with too high weight to be relevant are never
visited (and their weights are never computed).

Signed-off-by: Stephan Beyer <s-beyer@gmx.net>
---

Notes:
    I rephrased the commit message.
    
    I renamed the functions such that they don't talk about "BFS"
    because that is irrelevant. Also use a DFS now because it is
    less code (and a little more efficient).
    
    I plugged some leaks.

 bisect.c | 250 +++++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 162 insertions(+), 88 deletions(-)

diff --git a/bisect.c b/bisect.c
index c6bad43..9487ba9 100644
--- a/bisect.c
+++ b/bisect.c
@@ -30,6 +30,9 @@ static unsigned marker;
 struct node_data {
 	int weight;
 	unsigned marked;
+	unsigned parents;
+	unsigned visited : 1;
+	struct commit_list *children;
 };
 
 #define DEBUG_BISECT 0
@@ -110,16 +113,6 @@ static int count_interesting_parents(struct commit *commit)
 	return count;
 }
 
-static inline int halfway(struct commit *commit)
-{
-	/*
-	 * Don't short-cut something we are not going to return!
-	 */
-	if (commit->object.flags & TREESAME)
-		return 0;
-	return !distance_direction(commit);
-}
-
 #if !DEBUG_BISECT
 #define show_list(a,b,c) do { ; } while (0)
 #else
@@ -340,90 +333,168 @@ static void compute_all_weights(struct commit_list *list,
 	show_list("bisection 2 counted all", counted, list);
 }
 
-/* At the moment this is basically the same as compute_all_weights()
- * but with a halfway shortcut */
+static struct commit_list *build_reversed_dag(struct commit_list *list,
+					      struct node_data *nodes)
+{
+	struct commit_list *sources = NULL;
+	struct commit_list *p;
+	int n = 0;
+	for (p = list; p; p = p->next)
+		p->item->util = &nodes[n++];
+	for (p = list; p; p = p->next) {
+		struct commit_list *parent;
+		struct commit *commit = p->item;
+		for (parent = commit->parents; parent; parent = parent->next) {
+			if (!(parent->item->object.flags & UNINTERESTING)) {
+				struct commit_list **next = &node_data(parent->item)->children;
+				commit_list_insert(commit, next);
+				node_data(commit)->parents++;
+			}
+		}
+	}
+
+	/* find all sources */
+	for (p = list; p; p = p->next) {
+		if (node_data(p->item)->parents == 0)
+			commit_list_insert(p->item, &sources);
+	}
+
+	return sources;
+}
+
+static inline void commit_list_insert_unique(struct commit *item,
+				      struct commit_list **list)
+{
+	if (!*list || item < (*list)->item) /* empty list or item will be first */
+		commit_list_insert(item, list);
+	else if (item != (*list)->item) { /* item will not be first or not inserted */
+		struct commit_list *p = *list;
+		for (; p->next && p->next->item < item; p = p->next);
+		if (!p->next || item != p->next->item) /* not already inserted */
+			commit_list_insert(item, &p->next);
+	}
+}
+
+/* do a traversal on the reversed DAG (starting from commits in queue),
+ * but stop at merge commits */
+static void traversal_up_to_merges(struct commit_list *queue,
+				   struct commit_list **merges)
+{
+	assert(queue);
+	while (queue) {
+		struct commit *top = queue->item;
+		struct commit_list *p;
+
+		pop_commit(&queue);
+
+		if (distance_direction(top) > 0) {
+			node_data(top)->visited = 1;
+
+			/* queue children */
+			for (p = node_data(top)->children; p; p = p->next) {
+				if (node_data(p->item)->parents > 1) /* child is a merge */
+					commit_list_insert_unique(p->item, merges);
+				else {
+					node_data(p->item)->weight = node_data(top)->weight;
+					if (!(p->item->object.flags & TREESAME))
+						node_data(p->item)->weight++;
+					commit_list_insert(p->item, &queue);
+				}
+			}
+		}
+	}
+}
+
+static inline int all_parents_are_visited(struct commit *merge)
+{
+	struct commit_list *p;
+	for (p = merge->parents; p; p = p->next) {
+		if (p->item->util && !node_data(p->item)->visited)
+			return 0;
+	}
+	return 1;
+}
+
+static struct commit *extract_merge_to_queue(struct commit_list **merges)
+{
+	assert(merges);
+
+	struct commit_list *p, *q;
+	struct commit *found;
+
+	/* find a merge that is ready, i.e. all parents have been computed */
+	for (q = NULL, p = *merges; p && !all_parents_are_visited(p->item);
+	     q = p, p = p->next);
+	if (!p)
+		return NULL;
+
+	/* remove that commit from the list and return it */
+	if (q) {
+		assert(q->next == p);
+		q->next = p->next;
+	} else /* found first element of list */
+		*merges = p->next;
+	found = p->item;
+	free(p);
+
+	return found;
+}
+
+static inline int find_new_queue_from_merges(struct commit_list **queue,
+				      struct commit_list **merges)
+{
+	if (*merges) {
+		struct commit *merge = extract_merge_to_queue(merges);
+		*queue = NULL;
+		if (merge) {
+			commit_list_insert(merge, queue);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static inline void compute_merge_weights(struct commit_list *merges)
+{
+	struct commit_list *p;
+	for (p = merges; p; p = p->next)
+		compute_weight(p->item);
+}
+
+static void bottom_up_traversal(struct commit_list *queue)
+{
+	struct commit_list *merges = NULL;
+	traversal_up_to_merges(queue, &merges);
+	while (find_new_queue_from_merges(&queue, &merges)) {
+		compute_merge_weights(queue);
+		traversal_up_to_merges(queue, &merges);
+	}
+
+	/* cleanup */
+	free_commit_list(merges);
+}
+
+/* The idea is to reverse the DAG and perform a modified breadth-first search
+ * on it, starting on all sources of the reversed DAG.
+ * Before each visit of a commit, its weight is induced.
+ * This only works for non-merge commits, so the traversal stops prematurely on
+ * merge commits (that are collected in a list).
+ * Merge commits from that collection are considered for further visits
+ * as soon as all parents have been visited.
+ * Their weights are computed using compute_weight().
+ * Each traversal ends when the computed weight is falling or halfway.
+ */
 static void compute_relevant_weights(struct commit_list *list,
 				     struct node_data *weights)
 {
-	int n, counted;
 	struct commit_list *p;
+	struct commit_list *sources = build_reversed_dag(list, weights);
 
-	counted = 0;
+	for (p = sources; p; p = p->next)
+		node_data(p->item)->weight = 1;
+	bottom_up_traversal(sources);
 
-	for (n = 0, p = list; p; p = p->next) {
-		struct commit *commit = p->item;
-		unsigned flags = commit->object.flags;
-
-		commit->util = &weights[n++];
-		switch (count_interesting_parents(commit)) {
-		case 0:
-			if (!(flags & TREESAME)) {
-				node_data(commit)->weight = 1;
-				counted++;
-				show_list("bisection 2 count one",
-					  counted, list);
-			}
-			break;
-		case 1:
-			node_data(commit)->weight = -1;
-			break;
-		default:
-			node_data(commit)->weight = -2;
-			break;
-		}
-	}
-
-	show_list("bisection 2 initialize", counted, list);
-
-	for (p = list; p; p = p->next) {
-		if (!(p->item->object.flags & UNINTERESTING)
-		 && (node_data(p->item)->weight == -2)) {
-			compute_weight(p->item);
-
-			/* Does it happen to be at exactly half-way? */
-			if (halfway(p->item))
-				return;
-			counted++;
-		}
-	}
-
-	show_list("bisection 2 compute_weight", counted, list);
-
-	while (counted < total) {
-		for (p = list; p; p = p->next) {
-			struct commit_list *q;
-			unsigned flags = p->item->object.flags;
-
-			if (0 <= node_data(p->item)->weight)
-				continue;
-			for (q = p->item->parents; q; q = q->next) {
-				if (q->item->object.flags & UNINTERESTING)
-					continue;
-				if (0 <= node_data(q->item)->weight)
-					break;
-			}
-			if (!q)
-				continue;
-
-			/*
-			 * weight for p is unknown but q is known.
-			 * add one for p itself if p is to be counted,
-			 * otherwise inherit it from q directly.
-			 */
-			node_data(p->item)->weight = node_data(q->item)->weight;
-			if (!(flags & TREESAME)) {
-				node_data(p->item)->weight++;
-				counted++;
-				show_list("bisection 2 count one",
-					  counted, list);
-			}
-
-			/* Does it happen to be at exactly half-way? */
-			if (halfway(p->item))
-				return;
-		}
-	}
-	show_list("bisection 2 counted all", counted, list);
+	show_list("bisection 3 result", total, list);
 }
 
 struct commit_list *find_bisection(struct commit_list *list,
@@ -470,8 +541,11 @@ struct commit_list *find_bisection(struct commit_list *list,
 		compute_all_weights(list, weights);
 		best = best_bisection_sorted(list);
 	} else {
+		int i;
 		compute_relevant_weights(list, weights);
 		best = best_bisection(list);
+		for (i = 0; i < on_list; i++) /* cleanup */
+			free_commit_list(weights[i].children);
 	}
 	assert(best);
 	*reaches = node_data(best->item)->weight;
-- 
2.8.1.137.g522756c

  parent reply	other threads:[~2016-04-10 13:21 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-10 13:18 [PATCH v2 00/21] git bisect improvements Stephan Beyer
2016-04-10 13:18 ` [PATCH v2 01/21] bisect: write about `bisect next` in documentation Stephan Beyer
2016-04-10 13:18 ` [PATCH v2 02/21] bisect: allow 'bisect run' if no good commit is known Stephan Beyer
2016-04-10 13:18 ` [PATCH v2 03/21] t/test-lib-functions.sh: generalize test_cmp_rev Stephan Beyer
2016-04-11  0:07   ` Eric Sunshine
2016-04-15 20:00   ` Junio C Hamano
2016-04-24 19:51     ` Stephan Beyer
2016-04-25 18:08       ` Junio C Hamano
2016-04-10 13:18 ` [PATCH v2 04/21] t: use test_cmp_rev() where appropriate Stephan Beyer
2016-04-11  0:07   ` Eric Sunshine
2016-04-15 20:48   ` Junio C Hamano
2016-04-10 13:18 ` [PATCH v2 05/21] t6030: generalize test to not rely on current implementation Stephan Beyer
2016-04-10 13:47   ` Torsten Bögershausen
2016-04-10 19:16     ` Junio C Hamano
2016-04-10 19:37       ` Stephan Beyer
2016-04-11  0:23     ` Eric Sunshine
2016-04-15 21:07   ` Junio C Hamano
2016-04-10 13:18 ` [PATCH v2 06/21] bisect: add test for the bisect algorithm Stephan Beyer
2016-04-15 21:13   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 07/21] bisect: plug the biggest memory leak Stephan Beyer
2016-04-15 21:18   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 08/21] bisect: make bisect compile if DEBUG_BISECT is set Stephan Beyer
2016-04-15 21:22   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 09/21] bisect: make algorithm behavior independent of DEBUG_BISECT Stephan Beyer
2016-04-15 21:25   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 10/21] bisect: get rid of recursion in count_distance() Stephan Beyer
2016-04-15 21:31   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 11/21] bisect: use struct node_data array instead of int array Stephan Beyer
2016-04-12 23:02   ` Christian Couder
2016-04-15 21:47   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 12/21] bisect: replace clear_distance() by unique markers Stephan Beyer
2016-04-12 23:20   ` Christian Couder
2016-04-15 22:07   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 13/21] bisect: use commit instead of commit list as arguments when appropriate Stephan Beyer
2016-04-15 22:08   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 14/21] bisect: extract get_distance() function from code duplication Stephan Beyer
2016-04-15 22:08   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 15/21] bisect: introduce distance_direction() Stephan Beyer
2016-04-15 22:10   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 16/21] bisect: make total number of commits global Stephan Beyer
2016-04-13 13:23   ` Christian Couder
2016-04-15 22:11   ` Junio C Hamano
2016-04-16  0:44   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 17/21] bisect: rename count_distance() to compute_weight() Stephan Beyer
2016-04-13 13:32   ` Christian Couder
2016-04-15 22:12   ` Junio C Hamano
2016-04-10 13:19 ` [PATCH v2 18/21] bisect: prepare for different algorithms based on find_all Stephan Beyer
2016-04-15 22:36   ` Junio C Hamano
2016-04-10 13:19 ` Stephan Beyer [this message]
2016-04-13 14:11   ` [PATCH v2 19/21] bisect: use a bottom-up traversal to find relevant weights Christian Couder
2016-04-15 22:47   ` Junio C Hamano
2016-04-15 22:49   ` Junio C Hamano
2016-04-26 18:27   ` Junio C Hamano
2016-04-10 13:24 ` [PATCH v2 20/21] bisect: compute best bisection in compute_relevant_weights() Stephan Beyer
2016-04-10 13:24   ` [PATCH v2 21/21] bisect: get back halfway shortcut Stephan Beyer
2016-04-15 22:53     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460294354-7031-20-git-send-email-s-beyer@gmx.net \
    --to=s-beyer@gmx.net \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).