git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Christian Couder <christian.couder@gmail.com>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>, Jeff King <peff@peff.net>,
	Ben Peart <Ben.Peart@microsoft.com>,
	Jonathan Tan <jonathantanmy@google.com>,
	Nguyen Thai Ngoc Duy <pclouds@gmail.com>,
	Mike Hommey <mh@glandium.org>,
	Lars Schneider <larsxschneider@gmail.com>,
	Eric Wong <e@80x24.org>,
	Christian Couder <chriscool@tuxfamily.org>,
	Jeff Hostetler <jeffhost@microsoft.com>
Subject: [PATCH 04/40] fsck: introduce promisor objects
Date: Wed,  3 Jan 2018 17:33:27 +0100	[thread overview]
Message-ID: <20180103163403.11303-5-chriscool@tuxfamily.org> (raw)
In-Reply-To: <20180103163403.11303-1-chriscool@tuxfamily.org>

From: Jonathan Tan <jonathantanmy@google.com>

Currently, Git does not support repos with very large numbers of objects
or repos that wish to minimize manipulation of certain blobs (for
example, because they are very large) very well, even if the user
operates mostly on part of the repo, because Git is designed on the
assumption that every referenced object is available somewhere in the
repo storage. In such an arrangement, the full set of objects is usually
available in remote storage, ready to be lazily downloaded.

Teach fsck about the new state of affairs. In this commit, teach fsck
that missing promisor objects referenced from the reflog are not an
error case; in future commits, fsck will be taught about other cases.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fsck.c           |  2 +-
 cache.h                  |  3 +-
 packfile.c               | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 packfile.h               | 13 ++++++++
 t/t0410-partial-clone.sh | 81 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+), 5 deletions(-)
 create mode 100755 t/t0410-partial-clone.sh

diff --git a/builtin/fsck.c b/builtin/fsck.c
index 04846d46f9..793d289367 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -398,7 +398,7 @@ static void fsck_handle_reflog_oid(const char *refname, struct object_id *oid,
 					xstrfmt("%s@{%"PRItime"}", refname, timestamp));
 			obj->flags |= USED;
 			mark_object_reachable(obj);
-		} else {
+		} else if (!is_promisor_object(oid)) {
 			error("%s: invalid reflog entry %s", refname, oid_to_hex(oid));
 			errors_found |= ERROR_REACHABLE;
 		}
diff --git a/cache.h b/cache.h
index 7b27abac35..078607ee91 100644
--- a/cache.h
+++ b/cache.h
@@ -1649,7 +1649,8 @@ extern struct packed_git {
 	unsigned pack_local:1,
 		 pack_keep:1,
 		 freshened:1,
-		 do_not_close:1;
+		 do_not_close:1,
+		 pack_promisor:1;
 	unsigned char sha1[20];
 	struct revindex_entry *revindex;
 	/* something like ".git/objects/pack/xxxxx.pack" */
diff --git a/packfile.c b/packfile.c
index 4a5fe7ab18..aee6c3d674 100644
--- a/packfile.c
+++ b/packfile.c
@@ -8,6 +8,12 @@
 #include "list.h"
 #include "streaming.h"
 #include "sha1-lookup.h"
+#include "commit.h"
+#include "object.h"
+#include "tag.h"
+#include "tree-walk.h"
+#include "tree.h"
+#include "external-odb.h"
 
 char *odb_pack_name(struct strbuf *buf,
 		    const unsigned char *sha1,
@@ -643,10 +649,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 		return NULL;
 
 	/*
-	 * ".pack" is long enough to hold any suffix we're adding (and
+	 * ".promisor" is long enough to hold any suffix we're adding (and
 	 * the use xsnprintf double-checks that)
 	 */
-	alloc = st_add3(path_len, strlen(".pack"), 1);
+	alloc = st_add3(path_len, strlen(".promisor"), 1);
 	p = alloc_packed_git(alloc);
 	memcpy(p->pack_name, path, path_len);
 
@@ -654,6 +660,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
 	if (!access(p->pack_name, F_OK))
 		p->pack_keep = 1;
 
+	xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");
+	if (!access(p->pack_name, F_OK))
+		p->pack_promisor = 1;
+
 	xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
 	if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
 		free(p);
@@ -781,7 +791,8 @@ static void prepare_packed_git_one(char *objdir, int local)
 		if (ends_with(de->d_name, ".idx") ||
 		    ends_with(de->d_name, ".pack") ||
 		    ends_with(de->d_name, ".bitmap") ||
-		    ends_with(de->d_name, ".keep"))
+		    ends_with(de->d_name, ".keep") ||
+		    ends_with(de->d_name, ".promisor"))
 			string_list_append(&garbage, path.buf);
 		else
 			report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
@@ -1889,6 +1900,9 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
 	for (p = packed_git; p; p = p->next) {
 		if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
 			continue;
+		if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+		    !p->pack_promisor)
+			continue;
 		if (open_pack_index(p)) {
 			pack_errors = 1;
 			continue;
@@ -1899,3 +1913,61 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
 	}
 	return r ? r : pack_errors;
 }
+
+static int add_promisor_object(const struct object_id *oid,
+			       struct packed_git *pack,
+			       uint32_t pos,
+			       void *set_)
+{
+	struct oidset *set = set_;
+	struct object *obj = parse_object(oid);
+	if (!obj)
+		return 1;
+
+	oidset_insert(set, oid);
+
+	/*
+	 * If this is a tree, commit, or tag, the objects it refers
+	 * to are also promisor objects. (Blobs refer to no objects.)
+	 */
+	if (obj->type == OBJ_TREE) {
+		struct tree *tree = (struct tree *)obj;
+		struct tree_desc desc;
+		struct name_entry entry;
+		if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
+			/*
+			 * Error messages are given when packs are
+			 * verified, so do not print any here.
+			 */
+			return 0;
+		while (tree_entry_gently(&desc, &entry))
+			oidset_insert(set, entry.oid);
+	} else if (obj->type == OBJ_COMMIT) {
+		struct commit *commit = (struct commit *) obj;
+		struct commit_list *parents = commit->parents;
+
+		oidset_insert(set, &commit->tree->object.oid);
+		for (; parents; parents = parents->next)
+			oidset_insert(set, &parents->item->object.oid);
+	} else if (obj->type == OBJ_TAG) {
+		struct tag *tag = (struct tag *) obj;
+		oidset_insert(set, &tag->tagged->oid);
+	}
+	return 0;
+}
+
+int is_promisor_object(const struct object_id *oid)
+{
+	static struct oidset promisor_objects;
+	static int promisor_objects_prepared;
+
+	if (!promisor_objects_prepared) {
+		if (has_external_odb()) {
+			for_each_packed_object(add_promisor_object,
+					       &promisor_objects,
+					       FOR_EACH_OBJECT_PROMISOR_ONLY);
+		}
+		promisor_objects_prepared = 1;
+	}
+	return oidset_contains(&promisor_objects, oid);
+}
diff --git a/packfile.h b/packfile.h
index 0cdeb54dcd..a7fca598d6 100644
--- a/packfile.h
+++ b/packfile.h
@@ -1,6 +1,8 @@
 #ifndef PACKFILE_H
 #define PACKFILE_H
 
+#include "oidset.h"
+
 /*
  * Generate the filename to be used for a pack file with checksum "sha1" and
  * extension "ext". The result is written into the strbuf "buf", overwriting
@@ -124,6 +126,11 @@ extern int has_sha1_pack(const unsigned char *sha1);
 
 extern int has_pack_index(const unsigned char *sha1);
 
+/*
+ * Only iterate over packs obtained from the promisor remote.
+ */
+#define FOR_EACH_OBJECT_PROMISOR_ONLY 2
+
 /*
  * Iterate over packed objects in both the local
  * repository and any alternates repositories (unless the
@@ -135,4 +142,10 @@ typedef int each_packed_object_fn(const struct object_id *oid,
 				  void *data);
 extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags);
 
+/*
+ * Return 1 if an object in a promisor packfile is or refers to the given
+ * object, 0 otherwise.
+ */
+extern int is_promisor_object(const struct object_id *oid);
+
 #endif
diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh
new file mode 100755
index 0000000000..9257b8c885
--- /dev/null
+++ b/t/t0410-partial-clone.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+
+test_description='partial clone'
+
+. ./test-lib.sh
+
+delete_object () {
+	rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|')
+}
+
+pack_as_from_promisor () {
+	HASH=$(git -C repo pack-objects .git/objects/pack/pack) &&
+	>repo/.git/objects/pack/pack-$HASH.promisor
+}
+
+test_expect_success 'missing reflog object, but promised by a commit, passes fsck' '
+	test_create_repo repo &&
+	test_commit -C repo my_commit &&
+
+	A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
+	C=$(git -C repo commit-tree -m c -p $A HEAD^{tree}) &&
+
+	# Reference $A only from reflog, and delete it
+	git -C repo branch my_branch "$A" &&
+	git -C repo branch -f my_branch my_commit &&
+	delete_object repo "$A" &&
+
+	# State that we got $C, which refers to $A, from promisor
+	printf "$C\n" | pack_as_from_promisor &&
+
+	# Normally, it fails
+	test_must_fail git -C repo fsck &&
+
+	# But with the extension, it succeeds
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config odb.magic.promisorRemote "arbitrary string" &&
+	git -C repo fsck
+'
+
+test_expect_success 'missing reflog object, but promised by a tag, passes fsck' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	test_commit -C repo my_commit &&
+
+	A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
+	git -C repo tag -a -m d my_tag_name $A &&
+	T=$(git -C repo rev-parse my_tag_name) &&
+	git -C repo tag -d my_tag_name &&
+
+	# Reference $A only from reflog, and delete it
+	git -C repo branch my_branch "$A" &&
+	git -C repo branch -f my_branch my_commit &&
+	delete_object repo "$A" &&
+
+	# State that we got $T, which refers to $A, from promisor
+	printf "$T\n" | pack_as_from_promisor &&
+
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config odb.magic.promisorRemote "arbitrary string" &&
+	git -C repo fsck
+'
+
+test_expect_success 'missing reflog object alone fails fsck, even with extension set' '
+	rm -rf repo &&
+	test_create_repo repo &&
+	test_commit -C repo my_commit &&
+
+	A=$(git -C repo commit-tree -m a HEAD^{tree}) &&
+	B=$(git -C repo commit-tree -m b HEAD^{tree}) &&
+
+	# Reference $A only from reflog, and delete it
+	git -C repo branch my_branch "$A" &&
+	git -C repo branch -f my_branch my_commit &&
+	delete_object repo "$A" &&
+
+	git -C repo config core.repositoryformatversion 1 &&
+	git -C repo config odb.magic.promisorRemote "arbitrary string" &&
+	test_must_fail git -C repo fsck
+'
+
+test_done
-- 
2.16.0.rc0.16.g82191dbc6c.dirty


  parent reply	other threads:[~2018-01-03 16:37 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-03 16:33 [PATCH 00/40] Promisor remotes and external ODB support Christian Couder
2018-01-03 16:33 ` [PATCH 01/40] Add initial external odb support Christian Couder
2018-01-04 19:59   ` Jeff Hostetler
2018-01-15 14:34     ` Christian Couder
2018-01-03 16:33 ` [PATCH 02/40] Add GIT_NO_EXTERNAL_ODB env variable Christian Couder
2018-01-03 16:33 ` [PATCH 03/40] external-odb: add has_external_odb() Christian Couder
2018-01-03 16:33 ` Christian Couder [this message]
2018-01-03 16:33 ` [PATCH 05/40] fsck: support refs pointing to promisor objects Christian Couder
2018-01-03 16:33 ` [PATCH 06/40] fsck: support referenced " Christian Couder
2018-01-03 16:33 ` [PATCH 07/40] fsck: support promisor objects as CLI argument Christian Couder
2018-01-03 16:33 ` [PATCH 08/40] index-pack: refactor writing of .keep files Christian Couder
2018-01-03 16:33 ` [PATCH 09/40] introduce fetch-object: fetch one promisor object Christian Couder
2018-01-03 16:33 ` [PATCH 10/40] external-odb: implement external_odb_get_direct Christian Couder
2018-01-04 17:44   ` Jeff Hostetler
2018-01-15 14:47     ` Christian Couder
2018-01-03 16:33 ` [PATCH 11/40] sha1_file: support lazily fetching missing objects Christian Couder
2018-01-03 16:33 ` [PATCH 12/40] rev-list: support termination at promisor objects Christian Couder
2018-01-03 16:33 ` [PATCH 13/40] gc: do not repack promisor packfiles Christian Couder
2018-01-03 16:33 ` [PATCH 14/40] sha1_file: prepare for external odbs Christian Couder
2018-01-04 18:00   ` Jeff Hostetler
2018-01-16  7:23     ` Christian Couder
2018-01-03 16:33 ` [PATCH 15/40] external-odb: add script mode support Christian Couder
2018-01-04 19:55   ` Jeff Hostetler
2018-03-19 13:15     ` Christian Couder
2018-01-03 16:33 ` [PATCH 16/40] odb-helper: add 'enum odb_helper_type' Christian Couder
2018-01-03 16:33 ` [PATCH 17/40] odb-helper: add odb_helper_init() to send 'init' instruction Christian Couder
2018-01-03 16:33 ` [PATCH 18/40] t0400: add 'put_raw_obj' instruction to odb-helper script Christian Couder
2018-01-03 16:33 ` [PATCH 19/40] external odb: add 'put_raw_obj' support Christian Couder
2018-01-03 16:33 ` [PATCH 20/40] external-odb: accept only blobs for now Christian Couder
2018-01-03 16:33 ` [PATCH 21/40] t0400: add test for external odb write support Christian Couder
2018-01-03 16:33 ` [PATCH 22/40] Add t0410 to test external ODB transfer Christian Couder
2018-01-03 16:33 ` [PATCH 23/40] lib-httpd: pass config file to start_httpd() Christian Couder
2018-01-03 16:33 ` [PATCH 24/40] lib-httpd: add upload.sh Christian Couder
2018-01-03 16:33 ` [PATCH 25/40] lib-httpd: add list.sh Christian Couder
2018-01-03 16:33 ` [PATCH 26/40] lib-httpd: add apache-e-odb.conf Christian Couder
2018-01-03 16:33 ` [PATCH 27/40] odb-helper: add odb_helper_get_raw_object() Christian Couder
2018-01-03 16:33 ` [PATCH 28/40] pack-objects: don't pack objects in external odbs Christian Couder
2018-01-04 20:54   ` Jeff Hostetler
2018-03-19 13:27     ` Christian Couder
2018-01-03 16:33 ` [PATCH 29/40] Add t0420 to test transfer to HTTP external odb Christian Couder
2018-01-03 16:33 ` [PATCH 30/40] external-odb: add 'get_direct' support Christian Couder
2018-01-03 16:33 ` [PATCH 31/40] odb-helper: add 'script_mode' to 'struct odb_helper' Christian Couder
2018-01-03 16:33 ` [PATCH 32/40] odb-helper: add init_object_process() Christian Couder
2018-01-03 16:33 ` [PATCH 33/40] Add t0450 to test 'get_direct' mechanism Christian Couder
2018-01-03 16:33 ` [PATCH 34/40] Add t0460 to test passing git objects Christian Couder
2018-01-03 16:33 ` [PATCH 35/40] odb-helper: add put_object_process() Christian Couder
2018-01-03 16:33 ` [PATCH 36/40] Add t0470 to test passing raw objects Christian Couder
2018-01-03 16:34 ` [PATCH 37/40] odb-helper: add have_object_process() Christian Couder
2018-01-03 16:34 ` [PATCH 38/40] Add t0480 to test "have" capability and raw objects Christian Couder
2018-01-03 16:34 ` [PATCH 39/40] external-odb: use 'odb=magic' attribute to mark odb blobs Christian Couder
2018-01-03 16:34 ` [PATCH 40/40] Add Documentation/technical/external-odb.txt Christian Couder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180103163403.11303-5-chriscool@tuxfamily.org \
    --to=christian.couder@gmail.com \
    --cc=Ben.Peart@microsoft.com \
    --cc=chriscool@tuxfamily.org \
    --cc=e@80x24.org \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jeffhost@microsoft.com \
    --cc=jonathantanmy@google.com \
    --cc=larsxschneider@gmail.com \
    --cc=mh@glandium.org \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).