From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>, Junio C Hamano <gitster@pobox.com>
Subject: [PATCH 03/11] midx: move `midx_repack` (and related functions) to midx-write.c
Date: Mon, 25 Mar 2024 13:24:25 -0400 [thread overview]
Message-ID: <487a0ccda8c781a4e7cfdd14d32b0466a867ddff.1711387439.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1711387439.git.me@ttaylorr.com>
Move `midx_repack()`, the main function which implements the sub-command
'git multi-pack-index repack' into midx-write.c.
This patch does not introduce any behavioral changes and is best viewed
with `--color-moved`.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
midx-write.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++
midx.c | 196 -------------------------------------------------
2 files changed, 202 insertions(+), 196 deletions(-)
diff --git a/midx-write.c b/midx-write.c
index 4aab273243..6dd58be7e0 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -1,5 +1,11 @@
#include "git-compat-util.h"
+#include "config.h"
+#include "hex.h"
+#include "packfile.h"
#include "midx.h"
+#include "run-command.h"
+#include "pack-bitmap.h"
+#include "revision.h"
extern int write_midx_internal(const char *object_dir,
struct string_list *packs_to_include,
@@ -10,3 +16,199 @@ extern int write_midx_internal(const char *object_dir,
extern struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
const char *object_dir);
+
+struct repack_info {
+ timestamp_t mtime;
+ uint32_t referenced_objects;
+ uint32_t pack_int_id;
+};
+
+static int compare_by_mtime(const void *a_, const void *b_)
+{
+ const struct repack_info *a, *b;
+
+ a = (const struct repack_info *)a_;
+ b = (const struct repack_info *)b_;
+
+ if (a->mtime < b->mtime)
+ return -1;
+ if (a->mtime > b->mtime)
+ return 1;
+ return 0;
+}
+
+static int fill_included_packs_all(struct repository *r,
+ struct multi_pack_index *m,
+ unsigned char *include_pack)
+{
+ uint32_t i, count = 0;
+ int pack_kept_objects = 0;
+
+ repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
+
+ for (i = 0; i < m->num_packs; i++) {
+ if (prepare_midx_pack(r, m, i))
+ continue;
+ if (!pack_kept_objects && m->packs[i]->pack_keep)
+ continue;
+ if (m->packs[i]->is_cruft)
+ continue;
+
+ include_pack[i] = 1;
+ count++;
+ }
+
+ return count < 2;
+}
+
+static int fill_included_packs_batch(struct repository *r,
+ struct multi_pack_index *m,
+ unsigned char *include_pack,
+ size_t batch_size)
+{
+ uint32_t i, packs_to_repack;
+ size_t total_size;
+ struct repack_info *pack_info;
+ int pack_kept_objects = 0;
+
+ CALLOC_ARRAY(pack_info, m->num_packs);
+
+ repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
+
+ for (i = 0; i < m->num_packs; i++) {
+ pack_info[i].pack_int_id = i;
+
+ if (prepare_midx_pack(r, m, i))
+ continue;
+
+ pack_info[i].mtime = m->packs[i]->mtime;
+ }
+
+ for (i = 0; i < m->num_objects; i++) {
+ uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
+ pack_info[pack_int_id].referenced_objects++;
+ }
+
+ QSORT(pack_info, m->num_packs, compare_by_mtime);
+
+ total_size = 0;
+ packs_to_repack = 0;
+ for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
+ int pack_int_id = pack_info[i].pack_int_id;
+ struct packed_git *p = m->packs[pack_int_id];
+ size_t expected_size;
+
+ if (!p)
+ continue;
+ if (!pack_kept_objects && p->pack_keep)
+ continue;
+ if (p->is_cruft)
+ continue;
+ if (open_pack_index(p) || !p->num_objects)
+ continue;
+
+ expected_size = st_mult(p->pack_size,
+ pack_info[i].referenced_objects);
+ expected_size /= p->num_objects;
+
+ if (expected_size >= batch_size)
+ continue;
+
+ packs_to_repack++;
+ total_size += expected_size;
+ include_pack[pack_int_id] = 1;
+ }
+
+ free(pack_info);
+
+ if (packs_to_repack < 2)
+ return 1;
+
+ return 0;
+}
+
+int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
+{
+ int result = 0;
+ uint32_t i;
+ unsigned char *include_pack;
+ struct child_process cmd = CHILD_PROCESS_INIT;
+ FILE *cmd_in;
+ struct strbuf base_name = STRBUF_INIT;
+ struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
+
+ /*
+ * When updating the default for these configuration
+ * variables in builtin/repack.c, these must be adjusted
+ * to match.
+ */
+ int delta_base_offset = 1;
+ int use_delta_islands = 0;
+
+ if (!m)
+ return 0;
+
+ CALLOC_ARRAY(include_pack, m->num_packs);
+
+ if (batch_size) {
+ if (fill_included_packs_batch(r, m, include_pack, batch_size))
+ goto cleanup;
+ } else if (fill_included_packs_all(r, m, include_pack))
+ goto cleanup;
+
+ repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
+ repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
+
+ strvec_push(&cmd.args, "pack-objects");
+
+ strbuf_addstr(&base_name, object_dir);
+ strbuf_addstr(&base_name, "/pack/pack");
+ strvec_push(&cmd.args, base_name.buf);
+
+ if (delta_base_offset)
+ strvec_push(&cmd.args, "--delta-base-offset");
+ if (use_delta_islands)
+ strvec_push(&cmd.args, "--delta-islands");
+
+ if (flags & MIDX_PROGRESS)
+ strvec_push(&cmd.args, "--progress");
+ else
+ strvec_push(&cmd.args, "-q");
+
+ strbuf_release(&base_name);
+
+ cmd.git_cmd = 1;
+ cmd.in = cmd.out = -1;
+
+ if (start_command(&cmd)) {
+ error(_("could not start pack-objects"));
+ result = 1;
+ goto cleanup;
+ }
+
+ cmd_in = xfdopen(cmd.in, "w");
+
+ for (i = 0; i < m->num_objects; i++) {
+ struct object_id oid;
+ uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
+
+ if (!include_pack[pack_int_id])
+ continue;
+
+ nth_midxed_object_oid(&oid, m, i);
+ fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
+ }
+ fclose(cmd_in);
+
+ if (finish_command(&cmd)) {
+ error(_("could not finish pack-objects"));
+ result = 1;
+ goto cleanup;
+ }
+
+ result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
+
+cleanup:
+ free(include_pack);
+ return result;
+}
diff --git a/midx.c b/midx.c
index 5f22f01716..3bd8c58642 100644
--- a/midx.c
+++ b/midx.c
@@ -2055,199 +2055,3 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
return result;
}
-
-struct repack_info {
- timestamp_t mtime;
- uint32_t referenced_objects;
- uint32_t pack_int_id;
-};
-
-static int compare_by_mtime(const void *a_, const void *b_)
-{
- const struct repack_info *a, *b;
-
- a = (const struct repack_info *)a_;
- b = (const struct repack_info *)b_;
-
- if (a->mtime < b->mtime)
- return -1;
- if (a->mtime > b->mtime)
- return 1;
- return 0;
-}
-
-static int fill_included_packs_all(struct repository *r,
- struct multi_pack_index *m,
- unsigned char *include_pack)
-{
- uint32_t i, count = 0;
- int pack_kept_objects = 0;
-
- repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
-
- for (i = 0; i < m->num_packs; i++) {
- if (prepare_midx_pack(r, m, i))
- continue;
- if (!pack_kept_objects && m->packs[i]->pack_keep)
- continue;
- if (m->packs[i]->is_cruft)
- continue;
-
- include_pack[i] = 1;
- count++;
- }
-
- return count < 2;
-}
-
-static int fill_included_packs_batch(struct repository *r,
- struct multi_pack_index *m,
- unsigned char *include_pack,
- size_t batch_size)
-{
- uint32_t i, packs_to_repack;
- size_t total_size;
- struct repack_info *pack_info;
- int pack_kept_objects = 0;
-
- CALLOC_ARRAY(pack_info, m->num_packs);
-
- repo_config_get_bool(r, "repack.packkeptobjects", &pack_kept_objects);
-
- for (i = 0; i < m->num_packs; i++) {
- pack_info[i].pack_int_id = i;
-
- if (prepare_midx_pack(r, m, i))
- continue;
-
- pack_info[i].mtime = m->packs[i]->mtime;
- }
-
- for (i = 0; i < m->num_objects; i++) {
- uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
- pack_info[pack_int_id].referenced_objects++;
- }
-
- QSORT(pack_info, m->num_packs, compare_by_mtime);
-
- total_size = 0;
- packs_to_repack = 0;
- for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
- int pack_int_id = pack_info[i].pack_int_id;
- struct packed_git *p = m->packs[pack_int_id];
- size_t expected_size;
-
- if (!p)
- continue;
- if (!pack_kept_objects && p->pack_keep)
- continue;
- if (p->is_cruft)
- continue;
- if (open_pack_index(p) || !p->num_objects)
- continue;
-
- expected_size = st_mult(p->pack_size,
- pack_info[i].referenced_objects);
- expected_size /= p->num_objects;
-
- if (expected_size >= batch_size)
- continue;
-
- packs_to_repack++;
- total_size += expected_size;
- include_pack[pack_int_id] = 1;
- }
-
- free(pack_info);
-
- if (packs_to_repack < 2)
- return 1;
-
- return 0;
-}
-
-int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, unsigned flags)
-{
- int result = 0;
- uint32_t i;
- unsigned char *include_pack;
- struct child_process cmd = CHILD_PROCESS_INIT;
- FILE *cmd_in;
- struct strbuf base_name = STRBUF_INIT;
- struct multi_pack_index *m = lookup_multi_pack_index(r, object_dir);
-
- /*
- * When updating the default for these configuration
- * variables in builtin/repack.c, these must be adjusted
- * to match.
- */
- int delta_base_offset = 1;
- int use_delta_islands = 0;
-
- if (!m)
- return 0;
-
- CALLOC_ARRAY(include_pack, m->num_packs);
-
- if (batch_size) {
- if (fill_included_packs_batch(r, m, include_pack, batch_size))
- goto cleanup;
- } else if (fill_included_packs_all(r, m, include_pack))
- goto cleanup;
-
- repo_config_get_bool(r, "repack.usedeltabaseoffset", &delta_base_offset);
- repo_config_get_bool(r, "repack.usedeltaislands", &use_delta_islands);
-
- strvec_push(&cmd.args, "pack-objects");
-
- strbuf_addstr(&base_name, object_dir);
- strbuf_addstr(&base_name, "/pack/pack");
- strvec_push(&cmd.args, base_name.buf);
-
- if (delta_base_offset)
- strvec_push(&cmd.args, "--delta-base-offset");
- if (use_delta_islands)
- strvec_push(&cmd.args, "--delta-islands");
-
- if (flags & MIDX_PROGRESS)
- strvec_push(&cmd.args, "--progress");
- else
- strvec_push(&cmd.args, "-q");
-
- strbuf_release(&base_name);
-
- cmd.git_cmd = 1;
- cmd.in = cmd.out = -1;
-
- if (start_command(&cmd)) {
- error(_("could not start pack-objects"));
- result = 1;
- goto cleanup;
- }
-
- cmd_in = xfdopen(cmd.in, "w");
-
- for (i = 0; i < m->num_objects; i++) {
- struct object_id oid;
- uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
-
- if (!include_pack[pack_int_id])
- continue;
-
- nth_midxed_object_oid(&oid, m, i);
- fprintf(cmd_in, "%s\n", oid_to_hex(&oid));
- }
- fclose(cmd_in);
-
- if (finish_command(&cmd)) {
- error(_("could not finish pack-objects"));
- result = 1;
- goto cleanup;
- }
-
- result = write_midx_internal(object_dir, NULL, NULL, NULL, NULL, flags);
-
-cleanup:
- free(include_pack);
- return result;
-}
--
2.44.0.290.g736be63234b
next prev parent reply other threads:[~2024-03-25 18:04 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-25 17:24 [PATCH 00/11] midx: split MIDX writing routines into midx-write.c, cleanup Taylor Blau
2024-03-25 17:24 ` [PATCH 01/11] midx-write: initial commit Taylor Blau
2024-03-25 20:30 ` Junio C Hamano
2024-03-25 22:09 ` Taylor Blau
2024-03-25 17:24 ` [PATCH 02/11] midx: extern a pair of shared functions Taylor Blau
2024-03-25 17:24 ` Taylor Blau [this message]
2024-03-25 17:24 ` [PATCH 04/11] midx: move `expire_midx_packs` to midx-write.c Taylor Blau
2024-03-25 17:24 ` [PATCH 05/11] midx: move `write_midx_file_only` " Taylor Blau
2024-03-25 17:24 ` [PATCH 06/11] midx: move `write_midx_file` " Taylor Blau
2024-03-25 17:24 ` [PATCH 07/11] midx: move `write_midx_internal` (and related functions) " Taylor Blau
2024-03-25 17:24 ` [PATCH 08/11] midx-write.c: avoid directly managed temporary strbuf Taylor Blau
2024-03-25 20:33 ` Junio C Hamano
2024-03-25 22:11 ` Taylor Blau
2024-03-25 17:24 ` [PATCH 09/11] midx-write.c: factor out common want_included_pack() routine Taylor Blau
2024-03-25 20:36 ` Junio C Hamano
2024-03-27 8:29 ` Jeff King
2024-03-25 17:24 ` [PATCH 10/11] midx-write.c: check count of packs to repack after grouping Taylor Blau
2024-03-25 20:41 ` Junio C Hamano
2024-03-25 22:11 ` Taylor Blau
2024-03-25 17:24 ` [PATCH 11/11] midx-write.c: use `--stdin-packs` when repacking Taylor Blau
2024-03-27 8:37 ` Jeff King
2024-03-27 8:39 ` [PATCH 00/11] midx: split MIDX writing routines into midx-write.c, cleanup Jeff King
2024-04-01 21:16 ` [PATCH v2 0/4] " Taylor Blau
2024-04-01 21:16 ` [PATCH v2 1/4] midx-write: move writing-related functions from midx.c Taylor Blau
2024-04-01 21:16 ` [PATCH v2 2/4] midx-write.c: factor out common want_included_pack() routine Taylor Blau
2024-04-02 11:47 ` Patrick Steinhardt
2024-04-01 21:16 ` [PATCH v2 3/4] midx-write.c: check count of packs to repack after grouping Taylor Blau
2024-04-01 21:16 ` [PATCH v2 4/4] midx-write.c: use `--stdin-packs` when repacking Taylor Blau
2024-04-01 21:45 ` Junio C Hamano
2024-04-02 11:47 ` Patrick Steinhardt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=487a0ccda8c781a4e7cfdd14d32b0466a867ddff.1711387439.git.me@ttaylorr.com \
--to=me@ttaylorr.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).