git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Junio C Hamano <gitster@pobox.com>
Subject: [PATCH 03/11] midx: move `midx_repack` (and related functions) to midx-write.c
Date: Mon, 25 Mar 2024 13:24:25 -0400	[thread overview]
Message-ID: <487a0ccda8c781a4e7cfdd14d32b0466a867ddff.1711387439.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1711387439.git.me@ttaylorr.com>

Move `midx_repack()`, the main function which implements the sub-command
'git multi-pack-index repack' into midx-write.c.

This patch does not introduce any behavioral changes and is best viewed
with `--color-moved`.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx-write.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++
 midx.c       | 196 -------------------------------------------------
 2 files changed, 202 insertions(+), 196 deletions(-)

diff --git a/midx-write.c b/midx-write.c
index 4aab273243..6dd58be7e0 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -1,5 +1,11 @@
 #include "git-compat-util.h"
+#include "config.h"
+#include "hex.h"
+#include "packfile.h"
 #include "midx.h"
+#include "run-command.h"
+#include "pack-bitmap.h"
+#include "revision.h"
 
 extern int write_midx_internal(const char *object_dir,
 			       struct string_list *packs_to_include,
@@ -10,3 +16,199 @@ extern int write_midx_internal(const char *object_dir,
 
 extern struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
 							const char *object_dir);
+
+struct repack_info {
+	timestamp_t mtime;
+	uint32_t referenced_objects;
+	uint32_t pack_int_id;
+};
+
+static int compare_by_mtime(const void *a_, const void *b_)
+{
+	const struct repack_info *a, *b;
+
+	a = (const struct repack_info *)a_;
+	b = (const struct repack_info *)b_;
+
+	if (a->mtime < b->mtime)
+		return -1;
+	if (a->mtime > b->mtime)
+		return 1;
+	return 0;
+}
+
+static int fill_included_packs_all(struct repository *r,
+				   struct multi_pack_index *m,
+				   unsigned char *include_pack)
+{
+	uint32_t i, count = 0;
+	int pack_kept_objects = 0;
+
+	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
+
+	for (i = 0; i < m->num_packs; i++) {
+		if (prepare_midx_pack(r, m, i))
+			continue;
+		if (!pack_kept_objects && m->packs[i]->pack_keep)
+			continue;
+		if (m->packs[i]->is_cruft)
+			continue;
+
+		include_pack[i] = 1;
+		count++;
+	}
+
+	return count < 2;
+}
+
+static int fill_included_packs_batch(struct repository *r,
+				     struct multi_pack_index *m,
+				     unsigned char *include_pack,
+				     size_t batch_size)
+{
+	uint32_t i, packs_to_repack;
+	size_t total_size;
+	struct repack_info *pack_info;
+	int pack_kept_objects = 0;
+
+	CALLOC_ARRAY(pack_info, m->num_packs);
+
+	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
+
+	for (i = 0; i < m->num_packs; i++) {
+		pack_info[i].pack_int_id = i;
+
+		if (prepare_midx_pack(r, m, i))
+			continue;
+
+		pack_info[i].mtime = m->packs[i]->mtime;
+	}
+
+	for (i = 0; i < m->num_objects; i++) {
+		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
+		pack_info[pack_int_id].referenced_objects++;
+	}
+
+	QSORT(pack_info, m->num_packs, compare_by_mtime);
+
+	total_size = 0;
+	packs_to_repack = 0;
+	for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
+		int pack_int_id = pack_info[i].pack_int_id;
+		struct packed_git *p = m->packs[pack_int_id];
+		size_t expected_size;
+
+		if (!p)
+			continue;
+		if (!pack_kept_objects && p->pack_keep)
+			continue;
+		if (p->is_cruft)
+			continue;
+		if (open_pack_index(p) || !p->num_objects)
+			continue;
+
+		expected_size = st_mult(p->pack_size,
+					pack_info[i].referenced_objects);
+		expected_size /= p->num_objects;
+
+		if (expected_size >= batch_size)
+			continue;
+
+		packs_to_repack++;
+		total_size += expected_size;
+		include_pack[pack_int_id] = 1;
+	}
+
+	free(pack_info);
+
+	if (packs_to_repack < 2)
+		return 1;
+
+	return 0;
+}
+
+int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
+{
+	int result = 0;
+	uint32_t i;
+	unsigned char *include_pack;
+	struct child_process cmd = CHILD_PROCESS_INIT;
+	FILE *cmd_in;
+	struct strbuf base_name = STRBUF_INIT;
+	struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
+
+	/*
+	 * When updating the default for these configuration
+	 * variables in builtin/repack.c, these must be adjusted
+	 * to match.
+	 */
+	int delta_base_offset = 1;
+	int use_delta_islands = 0;
+
+	if (!m)
+		return 0;
+
+	CALLOC_ARRAY(include_pack, m->num_packs);
+
+	if (batch_size) {
+		if (fill_included_packs_batch(r, m, include_pack, batch_size))
+			goto cleanup;
+	} else if (fill_included_packs_all(r, m, include_pack))
+		goto cleanup;
+
+	repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
+	repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
+
+	strvec_push(&cmd.args, "pack-objects");
+
+	strbuf_addstr(&base_name, object_dir);
+	strbuf_addstr(&base_name, "/pack/pack");
+	strvec_push(&cmd.args, base_name.buf);
+
+	if (delta_base_offset)
+		strvec_push(&cmd.args, "--delta-base-offset");
+	if (use_delta_islands)
+		strvec_push(&cmd.args, "--delta-islands");
+
+	if (flags & MIDX_PROGRESS)
+		strvec_push(&cmd.args, "--progress");
+	else
+		strvec_push(&cmd.args, "-q");
+
+	strbuf_release(&base_name);
+
+	cmd.git_cmd = 1;
+	cmd.in = cmd.out = -1;
+
+	if (start_command(&cmd)) {
+		error(_("could not start pack-objects"));
+		result = 1;
+		goto cleanup;
+	}
+
+	cmd_in = xfdopen(cmd.in, "w");
+
+	for (i = 0; i < m->num_objects; i++) {
+		struct object_id oid;
+		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
+
+		if (!include_pack[pack_int_id])
+			continue;
+
+		nth_midxed_object_oid(&oid, m, i);
+		fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
+	}
+	fclose(cmd_in);
+
+	if (finish_command(&cmd)) {
+		error(_("could not finish pack-objects"));
+		result = 1;
+		goto cleanup;
+	}
+
+	result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
+
+cleanup:
+	free(include_pack);
+	return result;
+}
diff --git a/midx.c b/midx.c
index 5f22f01716..3bd8c58642 100644
--- a/midx.c
+++ b/midx.c
@@ -2055,199 +2055,3 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
 
 	return result;
 }
-
-struct repack_info {
-	timestamp_t mtime;
-	uint32_t referenced_objects;
-	uint32_t pack_int_id;
-};
-
-static int compare_by_mtime(const void *a_, const void *b_)
-{
-	const struct repack_info *a, *b;
-
-	a = (const struct repack_info *)a_;
-	b = (const struct repack_info *)b_;
-
-	if (a->mtime < b->mtime)
-		return -1;
-	if (a->mtime > b->mtime)
-		return 1;
-	return 0;
-}
-
-static int fill_included_packs_all(struct repository *r,
-				   struct multi_pack_index *m,
-				   unsigned char *include_pack)
-{
-	uint32_t i, count = 0;
-	int pack_kept_objects = 0;
-
-	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
-
-	for (i = 0; i < m->num_packs; i++) {
-		if (prepare_midx_pack(r, m, i))
-			continue;
-		if (!pack_kept_objects && m->packs[i]->pack_keep)
-			continue;
-		if (m->packs[i]->is_cruft)
-			continue;
-
-		include_pack[i] = 1;
-		count++;
-	}
-
-	return count < 2;
-}
-
-static int fill_included_packs_batch(struct repository *r,
-				     struct multi_pack_index *m,
-				     unsigned char *include_pack,
-				     size_t batch_size)
-{
-	uint32_t i, packs_to_repack;
-	size_t total_size;
-	struct repack_info *pack_info;
-	int pack_kept_objects = 0;
-
-	CALLOC_ARRAY(pack_info, m->num_packs);
-
-	repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
-
-	for (i = 0; i < m->num_packs; i++) {
-		pack_info[i].pack_int_id = i;
-
-		if (prepare_midx_pack(r, m, i))
-			continue;
-
-		pack_info[i].mtime = m->packs[i]->mtime;
-	}
-
-	for (i = 0; i < m->num_objects; i++) {
-		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
-		pack_info[pack_int_id].referenced_objects++;
-	}
-
-	QSORT(pack_info, m->num_packs, compare_by_mtime);
-
-	total_size = 0;
-	packs_to_repack = 0;
-	for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
-		int pack_int_id = pack_info[i].pack_int_id;
-		struct packed_git *p = m->packs[pack_int_id];
-		size_t expected_size;
-
-		if (!p)
-			continue;
-		if (!pack_kept_objects && p->pack_keep)
-			continue;
-		if (p->is_cruft)
-			continue;
-		if (open_pack_index(p) || !p->num_objects)
-			continue;
-
-		expected_size = st_mult(p->pack_size,
-					pack_info[i].referenced_objects);
-		expected_size /= p->num_objects;
-
-		if (expected_size >= batch_size)
-			continue;
-
-		packs_to_repack++;
-		total_size += expected_size;
-		include_pack[pack_int_id] = 1;
-	}
-
-	free(pack_info);
-
-	if (packs_to_repack < 2)
-		return 1;
-
-	return 0;
-}
-
-int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
-{
-	int result = 0;
-	uint32_t i;
-	unsigned char *include_pack;
-	struct child_process cmd = CHILD_PROCESS_INIT;
-	FILE *cmd_in;
-	struct strbuf base_name = STRBUF_INIT;
-	struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
-
-	/*
-	 * When updating the default for these configuration
-	 * variables in builtin/repack.c, these must be adjusted
-	 * to match.
-	 */
-	int delta_base_offset = 1;
-	int use_delta_islands = 0;
-
-	if (!m)
-		return 0;
-
-	CALLOC_ARRAY(include_pack, m->num_packs);
-
-	if (batch_size) {
-		if (fill_included_packs_batch(r, m, include_pack, batch_size))
-			goto cleanup;
-	} else if (fill_included_packs_all(r, m, include_pack))
-		goto cleanup;
-
-	repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
-	repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
-
-	strvec_push(&cmd.args, "pack-objects");
-
-	strbuf_addstr(&base_name, object_dir);
-	strbuf_addstr(&base_name, "/pack/pack");
-	strvec_push(&cmd.args, base_name.buf);
-
-	if (delta_base_offset)
-		strvec_push(&cmd.args, "--delta-base-offset");
-	if (use_delta_islands)
-		strvec_push(&cmd.args, "--delta-islands");
-
-	if (flags & MIDX_PROGRESS)
-		strvec_push(&cmd.args, "--progress");
-	else
-		strvec_push(&cmd.args, "-q");
-
-	strbuf_release(&base_name);
-
-	cmd.git_cmd = 1;
-	cmd.in = cmd.out = -1;
-
-	if (start_command(&cmd)) {
-		error(_("could not start pack-objects"));
-		result = 1;
-		goto cleanup;
-	}
-
-	cmd_in = xfdopen(cmd.in, "w");
-
-	for (i = 0; i < m->num_objects; i++) {
-		struct object_id oid;
-		uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
-
-		if (!include_pack[pack_int_id])
-			continue;
-
-		nth_midxed_object_oid(&oid, m, i);
-		fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
-	}
-	fclose(cmd_in);
-
-	if (finish_command(&cmd)) {
-		error(_("could not finish pack-objects"));
-		result = 1;
-		goto cleanup;
-	}
-
-	result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
-
-cleanup:
-	free(include_pack);
-	return result;
-}
-- 
2.44.0.290.g736be63234b



  parent reply	other threads:[~2024-03-25 18:04 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-25 17:24 [PATCH 00/11] midx: split MIDX writing routines into midx-write.c, cleanup Taylor Blau
2024-03-25 17:24 ` [PATCH 01/11] midx-write: initial commit Taylor Blau
2024-03-25 20:30   ` Junio C Hamano
2024-03-25 22:09     ` Taylor Blau
2024-03-25 17:24 ` [PATCH 02/11] midx: extern a pair of shared functions Taylor Blau
2024-03-25 17:24 ` Taylor Blau [this message]
2024-03-25 17:24 ` [PATCH 04/11] midx: move `expire_midx_packs` to midx-write.c Taylor Blau
2024-03-25 17:24 ` [PATCH 05/11] midx: move `write_midx_file_only` " Taylor Blau
2024-03-25 17:24 ` [PATCH 06/11] midx: move `write_midx_file` " Taylor Blau
2024-03-25 17:24 ` [PATCH 07/11] midx: move `write_midx_internal` (and related functions) " Taylor Blau
2024-03-25 17:24 ` [PATCH 08/11] midx-write.c: avoid directly managed temporary strbuf Taylor Blau
2024-03-25 20:33   ` Junio C Hamano
2024-03-25 22:11     ` Taylor Blau
2024-03-25 17:24 ` [PATCH 09/11] midx-write.c: factor out common want_included_pack() routine Taylor Blau
2024-03-25 20:36   ` Junio C Hamano
2024-03-27  8:29   ` Jeff King
2024-03-25 17:24 ` [PATCH 10/11] midx-write.c: check count of packs to repack after grouping Taylor Blau
2024-03-25 20:41   ` Junio C Hamano
2024-03-25 22:11     ` Taylor Blau
2024-03-25 17:24 ` [PATCH 11/11] midx-write.c: use `--stdin-packs` when repacking Taylor Blau
2024-03-27  8:37   ` Jeff King
2024-03-27  8:39 ` [PATCH 00/11] midx: split MIDX writing routines into midx-write.c, cleanup Jeff King
2024-04-01 21:16 ` [PATCH v2 0/4] " Taylor Blau
2024-04-01 21:16   ` [PATCH v2 1/4] midx-write: move writing-related functions from midx.c Taylor Blau
2024-04-01 21:16   ` [PATCH v2 2/4] midx-write.c: factor out common want_included_pack() routine Taylor Blau
2024-04-02 11:47     ` Patrick Steinhardt
2024-04-01 21:16   ` [PATCH v2 3/4] midx-write.c: check count of packs to repack after grouping Taylor Blau
2024-04-01 21:16   ` [PATCH v2 4/4] midx-write.c: use `--stdin-packs` when repacking Taylor Blau
2024-04-01 21:45     ` Junio C Hamano
2024-04-02 11:47     ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=487a0ccda8c781a4e7cfdd14d32b0466a867ddff.1711387439.git.me@ttaylorr.com \
    --to=me@ttaylorr.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).