git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>, gitster@pobox.com, git@jeffhostetler.com, peartben@gmail.com, christian.couder@gmail.com
Subject: [PATCH 09/18] gc: do not repack promisor packfiles
Date: Fri, 29 Sep 2017 13:11:45 -0700
Message-ID: <75be6a3b191753d14c5e10caf24ecf6926a7f544.1506714999.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1506714999.git.jonathantanmy@google.com>

Teach gc to stop traversal at promisor objects, and to leave promisor
packfiles alone. This has the effect of only repacking non-promisor
packfiles, and preserves the distinction between promisor packfiles and
non-promisor packfiles.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 builtin/gc.c             |  3 +++
 builtin/pack-objects.c   | 10 ++++++++++
 builtin/prune.c          |  7 +++++++
 builtin/repack.c         |  7 +++++--
 t/t0410-partial-clone.sh | 52 +++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index c22787ac7..7a9632bba 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -458,6 +458,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
 			argv_array_push(&prune, prune_expire);
 			if (quiet)
 				argv_array_push(&prune, "--no-progress");
+			if (repository_format_partial_clone)
+				argv_array_push(&prune,
+						"--exclude-promisor-objects");
 			if (run_command_v_opt(prune.argv, RUN_GIT_CMD))
 				return error(FAILED_RUN, prune.argv[0]);
 		}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index a57b4f058..958822bf4 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -73,6 +73,8 @@ static int use_bitmap_index = -1;
 static int write_bitmap_index;
 static uint16_t write_bitmap_options;
 
+static int exclude_promisor_objects;
+
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
 static unsigned long cache_max_small_delta_size = 1000;
@@ -2952,6 +2954,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 			 N_("use a bitmap index if available to speed up counting objects")),
 		OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index,
 			 N_("write a bitmap index together with the pack index")),
+		OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
+			 N_("do not pack objects in promisor packfiles")),
 		OPT_END(),
 	};
 
@@ -2997,6 +3001,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		argv_array_push(&rp, "--unpacked");
 	}
 
+	if (exclude_promisor_objects) {
+		use_internal_rev_list = 1;
+		fetch_if_missing = 0;
+		argv_array_push(&rp, "--exclude-promisor-objects");
+	}
+
 	if (!reuse_object)
 		reuse_delta = 0;
 	if (pack_compression_level == -1)
diff --git a/builtin/prune.c b/builtin/prune.c
index cddabf26a..be34645dc 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -101,12 +101,15 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
 {
 	struct rev_info revs;
 	struct progress *progress = NULL;
+	int exclude_promisor_objects = 0;
 	const struct option options[] = {
 		OPT__DRY_RUN(&show_only, N_("do not remove, show only")),
 		OPT__VERBOSE(&verbose, N_("report pruned objects")),
 		OPT_BOOL(0, "progress", &show_progress, N_("show progress")),
 		OPT_EXPIRY_DATE(0, "expire", &expire,
 				N_("expire objects older than <time>")),
+		OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
+			 N_("limit traversal to objects outside promisor packfiles")),
 		OPT_END()
 	};
 	char *s;
@@ -139,6 +142,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
 		show_progress = isatty(2);
 	if (show_progress)
 		progress = start_delayed_progress(_("Checking connectivity"), 0);
+	if (exclude_promisor_objects) {
+		fetch_if_missing = 0;
+		revs.exclude_promisor_objects = 1;
+	}
 
 	mark_reachable_objects(&revs, 1, expire, progress);
 	stop_progress(&progress);
diff --git a/builtin/repack.c b/builtin/repack.c
index f17a68a17..f43317bb5 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -83,7 +83,8 @@ static void remove_pack_on_signal(int signo)
 
 /*
  * Adds all packs hex strings to the fname list, which do not
- * have a corresponding .keep file.
+ * have a corresponding .keep or .promisor file. These packs are not to
+ * be kept if we are going to pack everything into one file.
  */
 static void get_non_kept_pack_filenames(struct string_list *fname_list)
 {
@@ -101,7 +102,8 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list)
 
 		fname = xmemdupz(e->d_name, len);
 
-		if (!file_exists(mkpath("%s/%s.keep", packdir, fname)))
+		if (!file_exists(mkpath("%s/%s.keep", packdir, fname)) &&
+		    !file_exists(mkpath("%s/%s.promisor", packdir, fname)))
 			string_list_append_nodup(fname_list, fname);
 		else
 			free(fname);
@@ -232,6 +234,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 	argv_array_push(&cmd.args, "--all");
 	argv_array_push(&cmd.args, "--reflog");
 	argv_array_push(&cmd.args, "--indexed-objects");
+	argv_array_push(&cmd.args, "--exclude-promisor-objects");
 	if (window)
 		argv_array_pushf(&cmd.args, "--window=%s", window);
 	if (window_memory)
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
index 3ca6af5cd..071736c7b 100755
--- a/t/t0410-partial-clone.sh
+++ b/t/t0410-partial-clone.sh
@@ -11,13 +11,15 @@ delete_object () {
 pack_as_from_promisor () {
 	HASH=$(git -C repo pack-objects .git/objects/pack/pack) &&
 	>repo/.git/objects/pack/pack-$HASH.promisor
+	echo $HASH
 }
 
 promise_and_delete () {
 	HASH=$(git -C repo rev-parse "$1") &&
 	git -C repo tag -a -m message my_annotated_tag "$HASH" &&
 	git -C repo rev-parse my_annotated_tag | pack_as_from_promisor &&
-	git -C repo tag -d my_annotated_tag &&
+	# tag -d prints a message to stdout, so redirect it
+	git -C repo tag -d my_annotated_tag >/dev/null &&
 	delete_object repo "$HASH"
 }
 
@@ -261,6 +263,54 @@ test_expect_success 'rev-list accepts missing and promised objects on command li
 	git -C repo rev-list --exclude-promisor-objects --objects "$COMMIT" "$TREE" "$BLOB"
 '
 
+test_expect_success 'gc does not repack promisor objects' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	test_commit -C repo my_commit &&
+
+	TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) &&
+	HASH=$(printf "$TREE_HASH\n" | pack_as_from_promisor) &&
+
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config extensions.partialclone "arbitrary string" &&
+	git -C repo gc &&
+
+	# Ensure that the promisor packfile still exists, and remove it
+	test -e repo/.git/objects/pack/pack-$HASH.pack &&
+	rm repo/.git/objects/pack/pack-$HASH.* &&
+
+	# Ensure that the single other pack contains the commit, but not the tree
+	ls repo/.git/objects/pack/pack-*.pack >packlist &&
+	test_line_count = 1 packlist &&
+	git verify-pack repo/.git/objects/pack/pack-*.pack -v >out &&
+	grep "$(git -C repo rev-parse HEAD)" out &&
+	! grep "$TREE_HASH" out
+'
+
+test_expect_success 'gc stops traversal when a missing but promised object is reached' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	test_commit -C repo my_commit &&
+
+	TREE_HASH=$(git -C repo rev-parse HEAD^{tree}) &&
+	HASH=$(promise_and_delete $TREE_HASH) &&
+
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config extensions.partialclone "arbitrary string" &&
+	git -C repo gc &&
+
+	# Ensure that the promisor packfile still exists, and remove it
+	test -e repo/.git/objects/pack/pack-$HASH.pack &&
+	rm repo/.git/objects/pack/pack-$HASH.* &&
+
+	# Ensure that the single other pack contains the commit, but not the tree
+	ls repo/.git/objects/pack/pack-*.pack >packlist &&
+	test_line_count = 1 packlist &&
+	git verify-pack repo/.git/objects/pack/pack-*.pack -v >out &&
+	grep "$(git -C repo rev-parse HEAD)" out &&
+	! grep "$TREE_HASH" out
+'
+
 LIB_HTTPD_PORT=12345  # default port, 410, cannot be used as non-root
 . "$TEST_DIRECTORY"/lib-httpd.sh
 start_httpd
-- 
2.14.2.822.g60be5d43e6-goog


  parent reply index

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-29 20:11 [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches) Jonathan Tan
2017-09-29 20:11 ` [PATCH 01/18] fsck: introduce partialclone extension Jonathan Tan
2017-09-29 20:11 ` [PATCH 02/18] fsck: support refs pointing to promisor objects Jonathan Tan
2017-09-29 20:11 ` [PATCH 03/18] fsck: support referenced " Jonathan Tan
2017-09-29 20:11 ` [PATCH 04/18] fsck: support promisor objects as CLI argument Jonathan Tan
2017-09-29 20:11 ` [PATCH 05/18] index-pack: refactor writing of .keep files Jonathan Tan
2017-09-29 20:11 ` [PATCH 06/18] introduce fetch-object: fetch one promisor object Jonathan Tan
2017-09-29 20:11 ` [PATCH 07/18] sha1_file: support lazily fetching missing objects Jonathan Tan
2017-10-12 14:42   ` Christian Couder
2017-10-12 15:45     ` Christian Couder
2017-09-29 20:11 ` [PATCH 08/18] rev-list: support termination at promisor objects Jonathan Tan
2017-09-29 20:11 ` Jonathan Tan [this message]
2017-09-29 20:11 ` [PATCH 10/18] pack-objects: rename want_.* to ignore_.* Jonathan Tan
2017-09-29 20:11 ` [PATCH 11/18] pack-objects: support --blob-max-bytes Jonathan Tan
2017-09-29 20:11 ` [PATCH 12/18] fetch-pack: support excluding large blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 13/18] fetch: refactor calculation of remote list Jonathan Tan
2017-09-29 20:11 ` [PATCH 14/18] fetch: support excluding large blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 15/18] clone: " Jonathan Tan
2017-09-29 20:11 ` [PATCH 16/18] clone: configure blobmaxbytes in created repos Jonathan Tan
2017-09-29 20:11 ` [PATCH 17/18] unpack-trees: batch fetching of missing blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 18/18] fetch-pack: restore save_commit_buffer after use Jonathan Tan
2017-09-29 21:08 ` [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches) Johannes Schindelin
2017-10-02  4:23 ` Junio C Hamano
2017-10-03  6:15 ` Christian Couder
2017-10-03  8:50   ` Junio C Hamano
2017-10-03 14:39     ` Jeff Hostetler
2017-10-03 23:42       ` Jonathan Tan
2017-10-04 13:30         ` Jeff Hostetler

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=75be6a3b191753d14c5e10caf24ecf6926a7f544.1506714999.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=christian.couder@gmail.com \
    --cc=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peartben@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox