git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: peff@peff.net, dstolee@microsoft.com, gitster@pobox.com,
	jonathantanmy@google.com
Subject: [PATCH 02/22] pack-bitmap-write.c: gracefully fail to write non-closed bitmaps
Date: Fri, 9 Apr 2021 14:10:44 -0400	[thread overview]
Message-ID: <d199954ef2246165f32a4f703d1e0ebf43847152.1617991824.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1617991824.git.me@ttaylorr.com>

The set of objects covered by a bitmap must be closed under
reachability, since it must be the case that there is a valid bit
position assigned for every possible reachable object (otherwise the
bitmaps would be incomplete).

Pack bitmaps are never written from 'git repack' unless repacking
all-into-one, and so we never write non-closed bitmaps.

But multi-pack bitmaps change this, since it isn't known whether the
set of objects in the MIDX is closed under reachability until walking
them. Plumb through a bit that is set when a reachable object isn't
found.

As soon as a reachable object isn't found in the set of objects to
include in the bitmap, bitmap_writer_build() knows that the set is not
closed, and so it now fails gracefully.

(The new conditional in builtin/pack-objects.c:bitmap_writer_build()
guards against other failure modes, but is never triggered here, because
of the all-into-one detail above. This return value will be important to
check from the multi-pack index caller.)

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 builtin/pack-objects.c |  3 +-
 pack-bitmap-write.c    | 76 +++++++++++++++++++++++++++++-------------
 pack-bitmap.h          |  2 +-
 3 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index a1e33d7507..5205dde2e1 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1116,7 +1116,8 @@ static void write_pack_file(void)
 
 				bitmap_writer_show_progress(progress);
 				bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
-				bitmap_writer_build(&to_pack);
+				if (bitmap_writer_build(&to_pack) < 0)
+					die(_("failed to write bitmap index"));
 				bitmap_writer_finish(written_list, nr_written,
 						     tmpname.buf, write_bitmap_options);
 				write_bitmap_index = 0;
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 88d9e696a5..e829c46649 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -125,15 +125,20 @@ static inline void push_bitmapped_commit(struct commit *commit)
 	writer.selected_nr++;
 }
 
-static uint32_t find_object_pos(const struct object_id *oid)
+static uint32_t find_object_pos(const struct object_id *oid, int *found)
 {
 	struct object_entry *entry = packlist_find(writer.to_pack, oid);
 
 	if (!entry) {
-		die("Failed to write bitmap index. Packfile doesn't have full closure "
+		if (found)
+			*found = 0;
+		warning("Failed to write bitmap index. Packfile doesn't have full closure "
 			"(object %s is missing)", oid_to_hex(oid));
+		return 0;
 	}
 
+	if (found)
+		*found = 1;
 	return oe_in_pack_pos(writer.to_pack, entry);
 }
 
@@ -331,9 +336,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb)
 	bb->commits_nr = bb->commits_alloc = 0;
 }
 
-static void fill_bitmap_tree(struct bitmap *bitmap,
-			     struct tree *tree)
+static int fill_bitmap_tree(struct bitmap *bitmap,
+			    struct tree *tree)
 {
+	int found;
 	uint32_t pos;
 	struct tree_desc desc;
 	struct name_entry entry;
@@ -342,9 +348,11 @@ static void fill_bitmap_tree(struct bitmap *bitmap,
 	 * If our bit is already set, then there is nothing to do. Both this
 	 * tree and all of its children will be set.
 	 */
-	pos = find_object_pos(&tree->object.oid);
+	pos = find_object_pos(&tree->object.oid, &found);
+	if (!found)
+		return -1;
 	if (bitmap_get(bitmap, pos))
-		return;
+		return 0;
 	bitmap_set(bitmap, pos);
 
 	if (parse_tree(tree) < 0)
@@ -355,11 +363,15 @@ static void fill_bitmap_tree(struct bitmap *bitmap,
 	while (tree_entry(&desc, &entry)) {
 		switch (object_type(entry.mode)) {
 		case OBJ_TREE:
-			fill_bitmap_tree(bitmap,
-					 lookup_tree(the_repository, &entry.oid));
+			if (fill_bitmap_tree(bitmap,
+					     lookup_tree(the_repository, &entry.oid)) < 0)
+				return -1;
 			break;
 		case OBJ_BLOB:
-			bitmap_set(bitmap, find_object_pos(&entry.oid));
+			pos = find_object_pos(&entry.oid, &found);
+			if (!found)
+				return -1;
+			bitmap_set(bitmap, pos);
 			break;
 		default:
 			/* Gitlink, etc; not reachable */
@@ -368,15 +380,18 @@ static void fill_bitmap_tree(struct bitmap *bitmap,
 	}
 
 	free_tree_buffer(tree);
+	return 0;
 }
 
-static void fill_bitmap_commit(struct bb_commit *ent,
-			       struct commit *commit,
-			       struct prio_queue *queue,
-			       struct prio_queue *tree_queue,
-			       struct bitmap_index *old_bitmap,
-			       const uint32_t *mapping)
+static int fill_bitmap_commit(struct bb_commit *ent,
+			      struct commit *commit,
+			      struct prio_queue *queue,
+			      struct prio_queue *tree_queue,
+			      struct bitmap_index *old_bitmap,
+			      const uint32_t *mapping)
 {
+	int found;
+	uint32_t pos;
 	if (!ent->bitmap)
 		ent->bitmap = bitmap_new();
 
@@ -401,11 +416,16 @@ static void fill_bitmap_commit(struct bb_commit *ent,
 		 * Mark ourselves and queue our tree. The commit
 		 * walk ensures we cover all parents.
 		 */
-		bitmap_set(ent->bitmap, find_object_pos(&c->object.oid));
+		pos = find_object_pos(&c->object.oid, &found);
+		if (!found)
+			return -1;
+		bitmap_set(ent->bitmap, pos);
 		prio_queue_put(tree_queue, get_commit_tree(c));
 
 		for (p = c->parents; p; p = p->next) {
-			int pos = find_object_pos(&p->item->object.oid);
+			pos = find_object_pos(&p->item->object.oid, &found);
+			if (!found)
+				return -1;
 			if (!bitmap_get(ent->bitmap, pos)) {
 				bitmap_set(ent->bitmap, pos);
 				prio_queue_put(queue, p->item);
@@ -413,8 +433,12 @@ static void fill_bitmap_commit(struct bb_commit *ent,
 		}
 	}
 
-	while (tree_queue->nr)
-		fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue));
+	while (tree_queue->nr) {
+		if (fill_bitmap_tree(ent->bitmap,
+				     prio_queue_get(tree_queue)) < 0)
+			return -1;
+	}
+	return 0;
 }
 
 static void store_selected(struct bb_commit *ent, struct commit *commit)
@@ -432,7 +456,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit)
 	kh_value(writer.bitmaps, hash_pos) = stored;
 }
 
-void bitmap_writer_build(struct packing_data *to_pack)
+int bitmap_writer_build(struct packing_data *to_pack)
 {
 	struct bitmap_builder bb;
 	size_t i;
@@ -441,6 +465,7 @@ void bitmap_writer_build(struct packing_data *to_pack)
 	struct prio_queue tree_queue = { NULL };
 	struct bitmap_index *old_bitmap;
 	uint32_t *mapping;
+	int closed = 1; /* until proven otherwise */
 
 	writer.bitmaps = kh_init_oid_map();
 	writer.to_pack = to_pack;
@@ -463,8 +488,11 @@ void bitmap_writer_build(struct packing_data *to_pack)
 		struct commit *child;
 		int reused = 0;
 
-		fill_bitmap_commit(ent, commit, &queue, &tree_queue,
-				   old_bitmap, mapping);
+		if (fill_bitmap_commit(ent, commit, &queue, &tree_queue,
+				       old_bitmap, mapping) < 0) {
+			closed = 0;
+			break;
+		}
 
 		if (ent->selected) {
 			store_selected(ent, commit);
@@ -499,7 +527,9 @@ void bitmap_writer_build(struct packing_data *to_pack)
 
 	stop_progress(&writer.progress);
 
-	compute_xor_offsets();
+	if (closed)
+		compute_xor_offsets();
+	return closed;
 }
 
 /**
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 78f2b3ff79..988ed3a30d 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -86,7 +86,7 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
 				      struct commit *commit);
 void bitmap_writer_select_commits(struct commit **indexed_commits,
 		unsigned int indexed_commits_nr, int max_bitmaps);
-void bitmap_writer_build(struct packing_data *to_pack);
+int bitmap_writer_build(struct packing_data *to_pack);
 void bitmap_writer_finish(struct pack_idx_entry **index,
 			  uint32_t index_nr,
 			  const char *filename,
-- 
2.31.1.163.ga65ce7f831


  parent reply	other threads:[~2021-04-09 18:10 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-09 18:10 [PATCH 00/22] multi-pack reachability bitmaps Taylor Blau
2021-04-09 18:10 ` [PATCH 01/22] pack-bitmap.c: harden 'test_bitmap_walk()' to check type bitmaps Taylor Blau
2021-04-09 18:10 ` Taylor Blau [this message]
2021-04-16  2:46   ` [PATCH 02/22] pack-bitmap-write.c: gracefully fail to write non-closed bitmaps Jonathan Tan
2021-04-09 18:10 ` [PATCH 03/22] pack-bitmap-write.c: free existing bitmaps Taylor Blau
2021-04-09 18:10 ` [PATCH 04/22] Documentation: build 'technical/bitmap-format' by default Taylor Blau
2021-04-09 18:11 ` [PATCH 05/22] Documentation: describe MIDX-based bitmaps Taylor Blau
2021-04-09 18:11 ` [PATCH 06/22] midx: make a number of functions non-static Taylor Blau
2021-04-09 18:11 ` [PATCH 07/22] midx: clear auxiliary .rev after replacing the MIDX Taylor Blau
2021-04-09 18:11 ` [PATCH 08/22] midx: respect 'core.multiPackIndex' when writing Taylor Blau
2021-04-09 18:11 ` [PATCH 09/22] pack-bitmap.c: introduce 'bitmap_num_objects()' Taylor Blau
2021-04-09 18:11 ` [PATCH 10/22] pack-bitmap.c: introduce 'nth_bitmap_object_oid()' Taylor Blau
2021-04-09 18:11 ` [PATCH 11/22] pack-bitmap.c: introduce 'bitmap_is_preferred_refname()' Taylor Blau
2021-04-09 18:11 ` [PATCH 12/22] pack-bitmap: read multi-pack bitmaps Taylor Blau
2021-04-16  2:39   ` Jonathan Tan
2021-04-16  3:13     ` Taylor Blau
2021-04-09 18:11 ` [PATCH 13/22] pack-bitmap: write " Taylor Blau
2021-05-04  5:02   ` Jonathan Tan
2021-05-06 20:18     ` Taylor Blau
2021-05-06 22:00       ` Jonathan Tan
2021-04-09 18:11 ` [PATCH 14/22] t5310: move some tests to lib-bitmap.sh Taylor Blau
2021-04-09 18:11 ` [PATCH 15/22] t/helper/test-read-midx.c: add --checksum mode Taylor Blau
2021-04-09 18:12 ` [PATCH 16/22] t5326: test multi-pack bitmap behavior Taylor Blau
2021-05-04 17:51   ` Jonathan Tan
2021-04-09 18:12 ` [PATCH 17/22] t5310: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP Taylor Blau
2021-04-09 18:12 ` [PATCH 18/22] t5319: don't write MIDX bitmaps in t5319 Taylor Blau
2021-04-09 18:12 ` [PATCH 19/22] t7700: update to work with MIDX bitmap test knob Taylor Blau
2021-04-09 18:12 ` [PATCH 20/22] midx: respect 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' Taylor Blau
2021-04-09 18:12 ` [PATCH 21/22] p5310: extract full and partial bitmap tests Taylor Blau
2021-04-09 18:12 ` [PATCH 22/22] p5326: perf tests for MIDX bitmaps Taylor Blau
2021-05-04 18:00   ` Jonathan Tan
2021-05-05  0:55     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d199954ef2246165f32a4f703d1e0ebf43847152.1617991824.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    --cc=peff@peff.net \
    --subject='Re: [PATCH 02/22] pack-bitmap-write.c: gracefully fail to write non-closed bitmaps' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.version-control.git
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.version-control.git
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git