git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Elijah Newren <newren@gmail.com>,
	Junio C Hamano <gitster@pobox.com>
Subject: [PATCH 07/24] pack-bitmap-write: support storing pseudo-merge commits
Date: Wed, 20 Mar 2024 18:05:19 -0400	[thread overview]
Message-ID: <7acdee2b5f2eddfb143afa2982f40bb0136ccdd1.1710972293.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1710972293.git.me@ttaylorr.com>

Prepare to write pseudo-merge bitmaps by annotating individual bitmapped
commits (which are represented by the `bitmapped_commit` structure) with
an extra bit indicating whether or not they are a pseudo-merge.

In subsequent commits, pseudo-merge bitmaps will be generated by
allocating a fake commit node with parents covering the full set of
commits represented by the pseudo-merge bitmap. These commits will be
added to the set of "selected" commits as usual, but will be written
specially instead of being included with the rest of the selected
commits.

Mechanically speaking, there are two parts of this change:

  - The bitmapped_commit struct gets a new bit indicating whether it is
    a pseudo-merge, or an ordinary commit selected for bitmaps.

  - A handful of changes to only write out the non-pseudo-merge commits
    when enumerating through the selected array (see the new
    `bitmap_writer_selected_nr()` function). Pseudo-merge commits appear
    after all non-pseudo-merge commits, so it is safe to enumerate
    through the selected array like so:

        for (i = 0; i < bitmap_writer_selected_nr(); i++)
          if (writer.selected[i].pseudo_merge)
            BUG("unexpected pseudo-merge");

    without encountering the BUG().

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 100 +++++++++++++++++++++++++++++---------------
 pack-bitmap.h       |   1 +
 2 files changed, 67 insertions(+), 34 deletions(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index ad768959633..b1e8a0ad66d 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -24,7 +24,7 @@ struct bitmapped_commit {
 	struct ewah_bitmap *write_as;
 	int flags;
 	int xor_offset;
-	uint32_t commit_pos;
+	unsigned pseudo_merge : 1;
 };
 
 struct bitmap_writer {
@@ -39,6 +39,8 @@ struct bitmap_writer {
 	struct bitmapped_commit *selected;
 	unsigned int selected_nr, selected_alloc;
 
+	uint32_t pseudo_merges_nr;
+
 	struct progress *progress;
 	int show_progress;
 	unsigned char pack_checksum[GIT_MAX_RAWSZ];
@@ -46,6 +48,11 @@ struct bitmap_writer {
 
 static struct bitmap_writer writer;
 
+static inline int bitmap_writer_selected_nr(void)
+{
+	return writer.selected_nr - writer.pseudo_merges_nr;
+}
+
 void bitmap_writer_init(struct repository *r)
 {
 	writer.bitmaps = kh_init_oid_map();
@@ -120,25 +127,30 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
  * Compute the actual bitmaps
  */
 
-static inline void push_bitmapped_commit(struct commit *commit)
+static void bitmap_writer_push_bitmapped_commit(struct commit *commit,
+						unsigned pseudo_merge)
 {
-	int hash_ret;
-	khiter_t hash_pos;
-
 	if (writer.selected_nr >= writer.selected_alloc) {
 		writer.selected_alloc = (writer.selected_alloc + 32) * 2;
 		REALLOC_ARRAY(writer.selected, writer.selected_alloc);
 	}
 
-	hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
-	if (!hash_ret)
-		die(_("duplicate entry when writing bitmap index: %s"),
-		    oid_to_hex(&commit->object.oid));
-	kh_value(writer.bitmaps, hash_pos) = NULL;
+	if (!pseudo_merge) {
+		int hash_ret;
+		khiter_t hash_pos = kh_put_oid_map(writer.bitmaps,
+						   commit->object.oid,
+						   &hash_ret);
+
+		if (!hash_ret)
+			die(_("duplicate entry when writing bitmap index: %s"),
+			    oid_to_hex(&commit->object.oid));
+		kh_value(writer.bitmaps, hash_pos) = NULL;
+	}
 
 	writer.selected[writer.selected_nr].commit = commit;
 	writer.selected[writer.selected_nr].bitmap = NULL;
 	writer.selected[writer.selected_nr].flags = 0;
+	writer.selected[writer.selected_nr].pseudo_merge = pseudo_merge;
 
 	writer.selected_nr++;
 }
@@ -168,16 +180,20 @@ static void compute_xor_offsets(void)
 
 	while (next < writer.selected_nr) {
 		struct bitmapped_commit *stored = &writer.selected[next];
-
 		int best_offset = 0;
 		struct ewah_bitmap *best_bitmap = stored->bitmap;
 		struct ewah_bitmap *test_xor;
 
+		if (stored->pseudo_merge)
+			goto next;
+
 		for (i = 1; i <= MAX_XOR_OFFSET_SEARCH; ++i) {
 			int curr = next - i;
 
 			if (curr < 0)
 				break;
+			if (writer.selected[curr].pseudo_merge)
+				continue;
 
 			test_xor = ewah_pool_new();
 			ewah_xor(writer.selected[curr].bitmap, stored->bitmap, test_xor);
@@ -193,6 +209,7 @@ static void compute_xor_offsets(void)
 			}
 		}
 
+next:
 		stored->xor_offset = best_offset;
 		stored->write_as = best_bitmap;
 
@@ -205,7 +222,8 @@ struct bb_commit {
 	struct bitmap *commit_mask;
 	struct bitmap *bitmap;
 	unsigned selected:1,
-		 maximal:1;
+		 maximal:1,
+		 pseudo_merge:1;
 	unsigned idx; /* within selected array */
 };
 
@@ -243,17 +261,18 @@ static void bitmap_builder_init(struct bitmap_builder *bb,
 	revs.first_parent_only = 1;
 
 	for (i = 0; i < writer->selected_nr; i++) {
-		struct commit *c = writer->selected[i].commit;
-		struct bb_commit *ent = bb_data_at(&bb->data, c);
+		struct bitmapped_commit *bc = &writer->selected[i];
+		struct bb_commit *ent = bb_data_at(&bb->data, bc->commit);
 
 		ent->selected = 1;
 		ent->maximal = 1;
+		ent->pseudo_merge = bc->pseudo_merge;
 		ent->idx = i;
 
 		ent->commit_mask = bitmap_new();
 		bitmap_set(ent->commit_mask, i);
 
-		add_pending_object(&revs, &c->object, "");
+		add_pending_object(&revs, &bc->commit->object, "");
 	}
 
 	if (prepare_revision_walk(&revs))
@@ -430,8 +449,13 @@ static int fill_bitmap_commit(struct bb_commit *ent,
 		struct commit *c = prio_queue_get(queue);
 
 		if (old_bitmap && mapping) {
-			struct ewah_bitmap *old = bitmap_for_commit(old_bitmap, c);
+			struct ewah_bitmap *old;
 			struct bitmap *remapped = bitmap_new();
+
+			if (commit->object.flags & BITMAP_PSEUDO_MERGE)
+				old = NULL;
+			else
+				old = bitmap_for_commit(old_bitmap, c);
 			/*
 			 * If this commit has an old bitmap, then translate that
 			 * bitmap and add its bits to this one. No need to walk
@@ -450,12 +474,14 @@ static int fill_bitmap_commit(struct bb_commit *ent,
 		 * Mark ourselves and queue our tree. The commit
 		 * walk ensures we cover all parents.
 		 */
-		pos = find_object_pos(&c->object.oid, &found);
-		if (!found)
-			return -1;
-		bitmap_set(ent->bitmap, pos);
-		prio_queue_put(tree_queue,
-			       repo_get_commit_tree(the_repository, c));
+		if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) {
+			pos = find_object_pos(&c->object.oid, &found);
+			if (!found)
+				return -1;
+			bitmap_set(ent->bitmap, pos);
+			prio_queue_put(tree_queue,
+				       repo_get_commit_tree(the_repository, c));
+		}
 
 		for (p = c->parents; p; p = p->next) {
 			pos = find_object_pos(&p->item->object.oid, &found);
@@ -483,6 +509,9 @@ static void store_selected(struct bb_commit *ent, struct commit *commit)
 
 	stored->bitmap = bitmap_to_ewah(ent->bitmap);
 
+	if (ent->pseudo_merge)
+		return;
+
 	hash_pos = kh_get_oid_map(writer.bitmaps, commit->object.oid);
 	if (hash_pos == kh_end(writer.bitmaps))
 		die(_("attempted to store non-selected commit: '%s'"),
@@ -612,7 +641,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
 
 	if (indexed_commits_nr < 100) {
 		for (i = 0; i < indexed_commits_nr; ++i)
-			push_bitmapped_commit(indexed_commits[i]);
+			bitmap_writer_push_bitmapped_commit(indexed_commits[i], 0);
 		return;
 	}
 
@@ -645,7 +674,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
 			}
 		}
 
-		push_bitmapped_commit(chosen);
+		bitmap_writer_push_bitmapped_commit(chosen, 0);
 
 		i += next + 1;
 		display_progress(writer.progress, i);
@@ -683,8 +712,11 @@ static void write_selected_commits_v1(struct hashfile *f,
 {
 	int i;
 
-	for (i = 0; i < writer.selected_nr; ++i) {
+	for (i = 0; i < bitmap_writer_selected_nr(); ++i) {
 		struct bitmapped_commit *stored = &writer.selected[i];
+		if (stored->pseudo_merge)
+			BUG("unexpected pseudo-merge among selected: %s",
+			    oid_to_hex(&stored->commit->object.oid));
 
 		if (offsets)
 			offsets[i] = hashfile_total(f);
@@ -718,10 +750,10 @@ static void write_lookup_table(struct hashfile *f,
 	uint32_t i;
 	uint32_t *table, *table_inv;
 
-	ALLOC_ARRAY(table, writer.selected_nr);
-	ALLOC_ARRAY(table_inv, writer.selected_nr);
+	ALLOC_ARRAY(table, bitmap_writer_selected_nr());
+	ALLOC_ARRAY(table_inv, bitmap_writer_selected_nr());
 
-	for (i = 0; i < writer.selected_nr; i++)
+	for (i = 0; i < bitmap_writer_selected_nr(); i++)
 		table[i] = i;
 
 	/*
@@ -729,16 +761,16 @@ static void write_lookup_table(struct hashfile *f,
 	 * bitmap corresponds to j'th bitmapped commit (among the selected
 	 * commits) in lex order of OIDs.
 	 */
-	QSORT_S(table, writer.selected_nr, table_cmp, commit_positions);
+	QSORT_S(table, bitmap_writer_selected_nr(), table_cmp, commit_positions);
 
 	/* table_inv helps us discover that relationship (i'th bitmap
 	 * to j'th commit by j = table_inv[i])
 	 */
-	for (i = 0; i < writer.selected_nr; i++)
+	for (i = 0; i < bitmap_writer_selected_nr(); i++)
 		table_inv[table[i]] = i;
 
 	trace2_region_enter("pack-bitmap-write", "writing_lookup_table", the_repository);
-	for (i = 0; i < writer.selected_nr; i++) {
+	for (i = 0; i < bitmap_writer_selected_nr(); i++) {
 		struct bitmapped_commit *selected = &writer.selected[table[i]];
 		uint32_t xor_offset = selected->xor_offset;
 		uint32_t xor_row;
@@ -809,7 +841,7 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
 	memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE));
 	header.version = htons(default_version);
 	header.options = htons(flags | options);
-	header.entry_count = htonl(writer.selected_nr);
+	header.entry_count = htonl(bitmap_writer_selected_nr());
 	hashcpy(header.checksum, writer.pack_checksum);
 
 	hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz);
@@ -821,9 +853,9 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
 	if (options & BITMAP_OPT_LOOKUP_TABLE)
 		CALLOC_ARRAY(offsets, index_nr);
 
-	ALLOC_ARRAY(commit_positions, writer.selected_nr);
+	ALLOC_ARRAY(commit_positions, bitmap_writer_selected_nr());
 
-	for (i = 0; i < writer.selected_nr; i++) {
+	for (i = 0; i < bitmap_writer_selected_nr(); i++) {
 		struct bitmapped_commit *stored = &writer.selected[i];
 		int commit_pos = oid_pos(&stored->commit->object.oid, index, index_nr, oid_access);
 
diff --git a/pack-bitmap.h b/pack-bitmap.h
index dae2d68a338..ca9acd2f735 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -21,6 +21,7 @@ struct bitmap_disk_header {
 	unsigned char checksum[GIT_MAX_RAWSZ];
 };
 
+#define BITMAP_PSEUDO_MERGE (1u<<21)
 #define NEEDS_BITMAP (1u<<22)
 
 /*
-- 
2.44.0.303.g1dc5e5b124c



  parent reply	other threads:[~2024-03-20 22:06 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-20 22:04 [PATCH 00/24] pack-bitmap: pseudo-merge reachability bitmaps Taylor Blau
2024-03-20 22:05 ` [PATCH 01/24] Documentation/technical: describe pseudo-merge bitmaps format Taylor Blau
2024-03-21 21:24   ` Junio C Hamano
2024-03-21 22:13     ` Taylor Blau
2024-03-21 22:22       ` Junio C Hamano
2024-03-20 22:05 ` [PATCH 02/24] config: repo_config_get_expiry() Taylor Blau
2024-04-10 17:54   ` Jeff King
2024-04-29 19:39     ` Taylor Blau
2024-03-20 22:05 ` [PATCH 03/24] ewah: implement `ewah_bitmap_is_subset()` Taylor Blau
2024-04-10 18:05   ` Jeff King
2024-04-29 19:47     ` Taylor Blau
2024-03-20 22:05 ` [PATCH 04/24] pack-bitmap: drop unused `max_bitmaps` parameter Taylor Blau
2024-04-10 18:06   ` Jeff King
2024-03-20 22:05 ` [PATCH 05/24] pack-bitmap: move some initialization to `bitmap_writer_init()` Taylor Blau
2024-04-10 18:10   ` Jeff King
2024-03-20 22:05 ` [PATCH 06/24] pseudo-merge.ch: initial commit Taylor Blau
2024-03-20 22:05 ` Taylor Blau [this message]
2024-03-20 22:05 ` [PATCH 08/24] pack-bitmap: implement `bitmap_writer_has_bitmapped_object_id()` Taylor Blau
2024-03-20 22:05 ` [PATCH 09/24] pack-bitmap: make `bitmap_writer_push_bitmapped_commit()` public Taylor Blau
2024-03-20 22:05 ` [PATCH 10/24] pseudo-merge: implement support for selecting pseudo-merge commits Taylor Blau
2024-03-20 22:05 ` [PATCH 11/24] pack-bitmap-write.c: select " Taylor Blau
2024-03-20 22:05 ` [PATCH 12/24] pack-bitmap-write.c: write pseudo-merge table Taylor Blau
2024-03-20 22:05 ` [PATCH 13/24] pack-bitmap: extract `read_bitmap()` function Taylor Blau
2024-03-20 22:05 ` [PATCH 14/24] pseudo-merge: scaffolding for reads Taylor Blau
2024-03-20 22:05 ` [PATCH 15/24] pack-bitmap.c: read pseudo-merge extension Taylor Blau
2024-03-20 22:05 ` [PATCH 16/24] pseudo-merge: implement support for reading pseudo-merge commits Taylor Blau
2024-03-20 22:05 ` [PATCH 17/24] ewah: implement `ewah_bitmap_popcount()` Taylor Blau
2024-03-20 22:05 ` [PATCH 18/24] pack-bitmap: implement test helpers for pseudo-merge Taylor Blau
2024-03-20 22:05 ` [PATCH 19/24] t/test-lib-functions.sh: support `--date` in `test_commit_bulk()` Taylor Blau
2024-03-20 22:05 ` [PATCH 20/24] pack-bitmap.c: use pseudo-merges during traversal Taylor Blau
2024-03-20 22:06 ` [PATCH 21/24] pack-bitmap: extra trace2 information Taylor Blau
2024-03-20 22:06 ` [PATCH 22/24] ewah: `bitmap_equals_ewah()` Taylor Blau
2024-03-20 22:06 ` [PATCH 23/24] pseudo-merge: implement support for finding existing merges Taylor Blau
2024-03-20 22:06 ` [PATCH 24/24] t/perf: implement performace tests for pseudo-merge bitmaps Taylor Blau
2024-03-21 19:50 ` [PATCH 00/24] pack-bitmap: pseudo-merge reachability bitmaps Junio C Hamano
2024-04-29 20:42 ` [PATCH v2 00/23] " Taylor Blau
2024-04-29 20:42   ` [PATCH v2 01/23] Documentation/technical: describe pseudo-merge bitmaps format Taylor Blau
2024-05-06 11:52     ` Patrick Steinhardt
2024-05-06 16:37       ` Taylor Blau
2024-05-10 11:46         ` Patrick Steinhardt
2024-05-13 19:47           ` Taylor Blau
2024-05-14  6:33             ` Patrick Steinhardt
2024-04-29 20:43   ` [PATCH v2 02/23] ewah: implement `ewah_bitmap_is_subset()` Taylor Blau
2024-04-29 20:43   ` [PATCH v2 03/23] pack-bitmap: drop unused `max_bitmaps` parameter Taylor Blau
2024-04-29 20:43   ` [PATCH v2 04/23] pack-bitmap: move some initialization to `bitmap_writer_init()` Taylor Blau
2024-05-06 11:52     ` Patrick Steinhardt
2024-05-06 18:24       ` Taylor Blau
2024-04-29 20:43   ` [PATCH v2 05/23] pseudo-merge.ch: initial commit Taylor Blau
2024-04-29 20:43   ` [PATCH v2 06/23] pack-bitmap-write: support storing pseudo-merge commits Taylor Blau
2024-05-06 11:52     ` Patrick Steinhardt
2024-05-06 18:48       ` Taylor Blau
2024-05-10 11:47         ` Patrick Steinhardt
2024-05-13 18:42     ` Jeff King
2024-05-13 20:19       ` Taylor Blau
2024-04-29 20:43   ` [PATCH v2 07/23] pack-bitmap: implement `bitmap_writer_has_bitmapped_object_id()` Taylor Blau
2024-04-29 20:43   ` [PATCH v2 08/23] pack-bitmap: make `bitmap_writer_push_bitmapped_commit()` public Taylor Blau
2024-05-13 18:50     ` Jeff King
2024-05-14  0:54       ` Taylor Blau
2024-04-29 20:43   ` [PATCH v2 09/23] pseudo-merge: implement support for selecting pseudo-merge commits Taylor Blau
2024-05-06 11:53     ` Patrick Steinhardt
2024-05-06 19:58       ` Taylor Blau
2024-05-13 19:03     ` Jeff King
2024-05-14  0:58       ` Taylor Blau
2024-05-16  8:07         ` Jeff King
2024-05-16 22:43           ` Junio C Hamano
2024-04-29 20:43   ` [PATCH v2 10/23] pack-bitmap-write.c: select " Taylor Blau
2024-05-06 11:53     ` Patrick Steinhardt
2024-05-06 20:05       ` Taylor Blau
2024-05-10 11:47         ` Patrick Steinhardt
2024-04-29 20:43   ` [PATCH v2 11/23] pack-bitmap-write.c: write pseudo-merge table Taylor Blau
2024-04-29 20:43   ` [PATCH v2 12/23] pack-bitmap: extract `read_bitmap()` function Taylor Blau
2024-04-29 20:43   ` [PATCH v2 13/23] pseudo-merge: scaffolding for reads Taylor Blau
2024-04-29 20:43   ` [PATCH v2 14/23] pack-bitmap.c: read pseudo-merge extension Taylor Blau
2024-04-29 20:44   ` [PATCH v2 15/23] pseudo-merge: implement support for reading pseudo-merge commits Taylor Blau
2024-04-29 20:44   ` [PATCH v2 16/23] ewah: implement `ewah_bitmap_popcount()` Taylor Blau
2024-04-29 20:44   ` [PATCH v2 17/23] pack-bitmap: implement test helpers for pseudo-merge Taylor Blau
2024-04-29 20:44   ` [PATCH v2 18/23] t/test-lib-functions.sh: support `--date` in `test_commit_bulk()` Taylor Blau
2024-04-29 20:44   ` [PATCH v2 19/23] pack-bitmap.c: use pseudo-merges during traversal Taylor Blau
2024-04-29 20:44   ` [PATCH v2 20/23] pack-bitmap: extra trace2 information Taylor Blau
2024-04-29 20:44   ` [PATCH v2 21/23] ewah: `bitmap_equals_ewah()` Taylor Blau
2024-04-29 20:44   ` [PATCH v2 22/23] pseudo-merge: implement support for finding existing merges Taylor Blau
2024-04-29 20:44   ` [PATCH v2 23/23] t/perf: implement performace tests for pseudo-merge bitmaps Taylor Blau
2024-04-30 20:03   ` [PATCH v2 00/23] pack-bitmap: pseudo-merge reachability bitmaps Junio C Hamano
2024-05-01 14:40     ` Taylor Blau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7acdee2b5f2eddfb143afa2982f40bb0136ccdd1.1710972293.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).