git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
From: Derrick Stolee <stolee@gmail.com>
To: git@vger.kernel.org
Cc: peff@peff.net, avarab@gmail.com, e@80x24.org,
	Derrick Stolee <dstolee@microsoft.com>
Subject: [PATCH] pack-bitmap: look for an uninteresting bitmap
Date: Fri, 24 May 2019 11:31:12 +0000
Message-ID: <20190524113112.30185-1-dstolee@microsoft.com> (raw)
In-Reply-To: <20190523113031.GA17448@sigill.intra.peff.net>

If we try to do a range query such as C..D with topology as

 A_0 - ... - A_10000 - B - C_1 - ... - C_1000 - C
                         \
                           D_1 - ... - D_1000 - D

and none of the commits in {A_i, B, C_j, C} have a computed
bitmap, then we will very likely walk many many trees before
computing one for the "have" bitmap.

Instead, perform a commit walk to the boundary of C...D and
look for computed bitmaps in { B, C_j, C }. If any are found,
then it is worth starting from there and building a bitmap.
If not, revert to the old method of walking trees.

NOTE: this is only a proof-of-concept, as it fails a test in
t5310-pack-bitmaps.sh (clearly marked as a failure now).

Reported-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---

On 5/23/2019 7:30 AM, Jeff King wrote:> +	/*
> +	 * First traverse the relevant commits as we would for a normal
> +	 * traversal.
> +	 */
> +	while (commits.nr) {
> +		struct commit *commit = prio_queue_get(&commits);
> +		struct bitmap **dst_bitmap;

I was looking at this code again, and noticed this while() condition.

Shouldn't this use queue_has_nonstale() like in paint_down_to_common()?

Looking at the body of the loop, I don't see a way for the loop to stop
without it walking the entire history of C _and_ D.

Based on that, I wrote the patch below as an experiment. The
has_uninteresting_bitmap_in_frontier() shamelessly steals code from
paint_down_to_common(). Note the failing test, but perhaps there is
something salvageable from this.

Thanks,
-Stolee


 pack-bitmap.c           | 92 ++++++++++++++++++++++++++++++++++++++++-
 t/t5310-pack-bitmaps.sh |  2 +-
 2 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/pack-bitmap.c b/pack-bitmap.c
index 6069b2fe55..1f4683663e 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -12,6 +12,7 @@
 #include "packfile.h"
 #include "repository.h"
 #include "object-store.h"
+#include "prio-queue.h"
 
 /*
  * An entry on the bitmap index, representing the bitmap for a given
@@ -679,6 +680,81 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git,
 	return 0;
 }
 
+#define PARENT1         (1u<<16)
+#define PARENT2         (1u<<17)
+#define STALE           (1u<<18)
+
+static const int all_flags = { PARENT1 | PARENT2 | STALE };
+
+static int queue_has_nonstale(struct prio_queue *queue)
+{
+	int i;
+	for (i = 0; i < queue->nr; i++) {
+		struct commit *commit = queue->array[i].data;
+		if (!(commit->object.flags & STALE))
+			return 1;
+	}
+	return 0;
+}
+
+static int has_uninteresting_bitmap_in_frontier(struct repository *r,
+						struct commit_list *list,
+						struct bitmap_index *bitmap_git)
+{
+	int res = 0;
+	struct commit_list *iter = list;
+	struct prio_queue queue = { compare_commits_by_commit_date };
+
+	while (iter) {
+		prio_queue_put(&queue, iter->item);
+		iter = iter->next;
+	}
+
+	while (queue_has_nonstale(&queue)) {
+		struct commit *commit = prio_queue_get(&queue);
+		struct commit_list *parents;
+		int flags;
+
+		flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
+		if (flags == (PARENT1 | PARENT2)) {
+			/* Mark parents of a found merge stale */
+			flags |= STALE;
+		}
+
+		if (flags & PARENT1) {
+			khiter_t pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid);
+
+			if (pos < kh_end(bitmap_git->bitmaps)) {
+				res = 1;
+				goto cleanup;
+			}
+		}
+
+		parents = commit->parents;
+		while (parents) {
+			struct commit *p = parents->item;
+			parents = parents->next;
+			if ((p->object.flags & flags) == flags)
+				continue;
+			if (repo_parse_commit(r, p))
+				goto cleanup;
+			p->object.flags |= flags;
+			prio_queue_put(&queue, p);
+		}
+	}
+
+cleanup:
+	clear_prio_queue(&queue);
+
+	iter = list;
+	while (iter) {
+		clear_commit_marks(iter->item, all_flags);
+		iter = iter->next;
+	}
+
+	return res;
+}
+
 struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
 {
 	unsigned int i;
@@ -689,6 +765,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
 	struct bitmap *wants_bitmap = NULL;
 	struct bitmap *haves_bitmap = NULL;
 
+	struct commit_list *commits = NULL;
+
 	struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git));
 	/* try to open a bitmapped pack, but don't parse it yet
 	 * because we may not need to use it */
@@ -704,16 +782,22 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
 		while (object->type == OBJ_TAG) {
 			struct tag *tag = (struct tag *) object;
 
-			if (object->flags & UNINTERESTING)
+			if (object->flags & UNINTERESTING) {
+				object->flags |= PARENT1;
 				object_list_insert(object, &haves);
-			else
+			} else {
+				object->flags |= PARENT2;
 				object_list_insert(object, &wants);
+			}
 
 			if (!tag->tagged)
 				die("bad tag");
 			object = parse_object_or_die(&tag->tagged->oid, NULL);
 		}
 
+		if (object->type == OBJ_COMMIT)
+			commit_list_insert((struct commit *)object, &commits);
+
 		if (object->flags & UNINTERESTING)
 			object_list_insert(object, &haves);
 		else
@@ -740,6 +824,10 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
 	if (load_pack_bitmap(bitmap_git) < 0)
 		goto cleanup;
 
+	if (!has_uninteresting_bitmap_in_frontier(the_repository, commits, bitmap_git))
+		goto cleanup;
+
+	/* this is the real no-turning-back point! */
 	object_array_clear(&revs->pending);
 
 	if (haves) {
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index a26c8ba9a2..615608fbbf 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -422,7 +422,7 @@ test_expect_success 'fetch without bitmaps ignores delta against old base' '
 '
 
 # And do the same for the bitmap case, where we do expect to find the delta.
-test_expect_success 'fetch with bitmaps can reuse old base' '
+test_expect_failure 'fetch with bitmaps can reuse old base' '
 	test_config pack.usebitmaps true &&
 	test_when_finished "rm -rf client.git" &&
 	git init --bare client.git &&
-- 
2.22.0.rc0


  parent reply index

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-14  4:31 [PATCH 0/3] some prune optimizations Jeff King
2019-02-14  4:35 ` [PATCH 1/3] prune: lazily perform reachability traversal Jeff King
2019-02-14 10:54   ` Eric Sunshine
2019-02-14 11:07     ` Jeff King
2019-02-14  4:37 ` [PATCH 2/3] prune: use bitmaps for " Jeff King
2019-03-09  2:49   ` bitmaps by default? [was: prune: use bitmaps for reachability traversal] Eric Wong
2019-03-10 23:39     ` Jeff King
2019-03-12  3:13       ` [PATCH] repack: enable bitmaps by default on bare repos Eric Wong
2019-03-12  9:07         ` Ævar Arnfjörð Bjarmason
2019-03-12 10:49         ` Jeff King
2019-03-12 12:05           ` Jeff King
2019-03-13  1:51           ` Eric Wong
2019-03-13 14:54             ` Jeff King
2019-03-14  9:12               ` [PATCH v3] " Eric Wong
2019-03-14 16:02                 ` Jeff King
2019-03-15  6:21                   ` [PATCH 0/2] enable bitmap hash-cache by default Jeff King
2019-03-15  6:22                     ` [PATCH 1/2] t5310: correctly remove bitmaps for jgit test Jeff King
2019-03-15 13:25                       ` SZEDER Gábor
2019-03-15 18:36                         ` Jeff King
2019-03-15  6:25                     ` [PATCH 2/2] pack-objects: default to writing bitmap hash-cache Jeff King
2019-04-09 15:10                 ` [PATCH v3] repack: enable bitmaps by default on bare repos Ævar Arnfjörð Bjarmason
2019-04-10 22:57                   ` Jeff King
2019-04-25  7:16                     ` Junio C Hamano
2019-05-04  1:37                       ` Jeff King
2019-05-04  6:52                         ` Ævar Arnfjörð Bjarmason
2019-05-04 13:23                           ` SZEDER Gábor
2019-05-08 20:17                             ` Ævar Arnfjörð Bjarmason
2019-05-09  4:24                               ` Junio C Hamano
2019-05-07  7:45                           ` Jeff King
2019-05-07  8:12                             ` Ævar Arnfjörð Bjarmason
2019-05-08  7:11                               ` Jeff King
2019-05-08 14:20                                 ` Derrick Stolee
2019-05-08 16:13                                 ` Ævar Arnfjörð Bjarmason
2019-05-08 22:25                                   ` Jeff King
2019-05-23 11:30                     ` Jeff King
2019-05-23 12:53                       ` Derrick Stolee
2019-05-24  7:24                         ` Jeff King
2019-05-24 10:33                           ` Derrick Stolee
2019-05-23 19:26                       ` Ævar Arnfjörð Bjarmason
2019-05-24  7:27                         ` Jeff King
2019-05-24  7:55                           ` Ævar Arnfjörð Bjarmason
2019-05-24  8:26                             ` Jeff King
2019-05-24  9:01                               ` Ævar Arnfjörð Bjarmason
2019-05-24  9:29                                 ` SZEDER Gábor
2019-05-24 11:17                                   ` Ævar Arnfjörð Bjarmason
2019-05-24 11:41                                     ` SZEDER Gábor
2019-05-24 11:58                                       ` Ævar Arnfjörð Bjarmason
2019-05-24 12:34                                         ` SZEDER Gábor
2019-05-24 13:41                                           ` Ævar Arnfjörð Bjarmason
2019-05-24 11:31                       ` Derrick Stolee [this message]
2019-04-15 15:00   ` [PATCH 2/3] prune: use bitmaps for reachability traversal Derrick Stolee
2019-04-18 19:49     ` Jeff King
2019-04-18 20:08       ` [PATCH] t5304: add a test for pruning with bitmaps Jeff King
2019-04-20  1:01         ` Derrick Stolee
2019-04-20  3:24           ` Jeff King
2019-04-20 21:01             ` Derrick Stolee
2019-02-14  4:38 ` [PATCH 3/3] prune: check SEEN flag for reachability Jeff King

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190524113112.30185-1-dstolee@microsoft.com \
    --to=stolee@gmail.com \
    --cc=avarab@gmail.com \
    --cc=dstolee@microsoft.com \
    --cc=e@80x24.org \
    --cc=git@vger.kernel.org \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox