git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Andrew Oakley <andrew@adoakley.name>
To: git@vger.kernel.org, Luke Diamand <luke@diamand.org>,
	Jonathan Tan <jonathantanmy@google.com>
Cc: Andrew Oakley <andrew@adoakley.name>
Subject: [PATCH 2/7] submodule: use separate submodule repositories
Date: Tue, 29 Sep 2020 16:53:45 +0100	[thread overview]
Message-ID: <20200929155350.49066-3-andrew@adoakley.name> (raw)
In-Reply-To: <20200929155350.49066-1-andrew@adoakley.name>

This commit allows `git fetch --recuse-submodules` to work correctly
with partial clones, including the case where it is only the parent
repository that is a partial clone.

Using a separate repository with its own object store should allow
any objects that need to be fetched from a promisor or an alternate
object store to work correctly.

Replacing get_submodule_ref_store with get_main_ref_store for the
correct repo makes the refs store lookup objects in the correct repo
(for at least the cases relevant for a fetch).  We still can't fetch
objects from promisor remotes, but do_oid_object_info_extended detects
this and fails gracefully.

Signed-off-by: Andrew Oakley <andrew@adoakley.name>
---
 submodule.c | 105 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 60 insertions(+), 45 deletions(-)

diff --git a/submodule.c b/submodule.c
index 543b1123ae..3889dc7d9a 100644
--- a/submodule.c
+++ b/submodule.c
@@ -89,11 +89,11 @@ int is_staging_gitmodules_ok(struct index_state *istate)
 	return 1;
 }
 
-static int for_each_remote_ref_submodule(const char *submodule,
+static int for_each_remote_ref_submodule(struct repository *subrepo,
 					 each_ref_fn fn, void *cb_data)
 {
-	return refs_for_each_remote_ref(get_submodule_ref_store(submodule),
-					fn, cb_data);
+       return refs_for_each_remote_ref(get_main_ref_store(subrepo), fn,
+				       cb_data);
 }
 
 /*
@@ -879,6 +879,27 @@ static void free_submodules_oids(struct string_list *submodules)
 	string_list_clear(submodules, 1);
 }
 
+static struct repository* get_changed_submodule_repo(struct repository *r,
+						     const char *name_or_path)
+{
+	const struct submodule *submodule;
+	struct repository *subrepo;
+
+	submodule = submodule_from_name(r, &null_oid, name_or_path);
+	if (!submodule) {
+		/* Not a named submodule, try just using the path */
+		return open_submodule(name_or_path);
+	}
+
+	subrepo = xmalloc(sizeof(*subrepo));
+	if (repo_submodule_init(subrepo, r, submodule)) {
+		free(subrepo);
+		return NULL;
+	}
+
+	return subrepo;
+}
+
 static int has_remote(const char *refname, const struct object_id *oid,
 		      int flags, void *cb_data)
 {
@@ -895,7 +916,6 @@ static int append_oid_to_argv(const struct object_id *oid, void *data)
 struct has_commit_data {
 	struct repository *repo;
 	int result;
-	const char *path;
 };
 
 static int check_has_commit(const struct object_id *oid, void *data)
@@ -916,28 +936,22 @@ static int check_has_commit(const struct object_id *oid, void *data)
 		return 0;
 	default:
 		die(_("submodule entry '%s' (%s) is a %s, not a commit"),
-		    cb->path, oid_to_hex(oid), type_name(type));
+		    cb->repo->submodule_prefix, oid_to_hex(oid),
+		    type_name(type));
 	}
 }
 
-static int submodule_has_commits(struct repository *r,
-				 const char *path,
+static int submodule_has_commits(struct repository *subrepo,
 				 struct oid_array *commits)
 {
-	struct has_commit_data has_commit = { r, 1, path };
+	struct has_commit_data has_commit = { subrepo, 1 };
 
 	/*
-	 * Perform a cheap, but incorrect check for the existence of 'commits'.
-	 * This is done by adding the submodule's object store to the in-core
-	 * object store, and then querying for each commit's existence.  If we
-	 * do not have the commit object anywhere, there is no chance we have
-	 * it in the object store of the correct submodule and have it
-	 * reachable from a ref, so we can fail early without spawning rev-list
-	 * which is expensive.
+	 * Perform a check for the existence of 'commits' in the submodule's
+	 * object store.  If we do not have the commit object, there is no
+	 * chance we have it reachable from a ref, so we can fail early without
+	 * spawning rev-list which is expensive.
 	 */
-	if (add_submodule_odb(path))
-		return 0;
-
 	oid_array_for_each_unique(commits, check_has_commit, &has_commit);
 
 	if (has_commit.result) {
@@ -956,7 +970,7 @@ static int submodule_has_commits(struct repository *r,
 		prepare_submodule_repo_env(&cp.env_array);
 		cp.git_cmd = 1;
 		cp.no_stdin = 1;
-		cp.dir = path;
+		cp.dir = subrepo->submodule_prefix;
 
 		if (capture_command(&cp, &out, GIT_MAX_HEXSZ + 1) || out.len)
 			has_commit.result = 0;
@@ -967,11 +981,12 @@ static int submodule_has_commits(struct repository *r,
 	return has_commit.result;
 }
 
-static int submodule_needs_pushing(struct repository *r,
-				   const char *path,
+static int submodule_needs_pushing(struct repository *subrepo,
 				   struct oid_array *commits)
 {
-	if (!submodule_has_commits(r, path, commits))
+	const char *path = subrepo->submodule_prefix;
+
+	if (!submodule_has_commits(subrepo, commits))
 		/*
 		 * NOTE: We do consider it safe to return "no" here. The
 		 * correct answer would be "We do not know" instead of
@@ -985,7 +1000,7 @@ static int submodule_needs_pushing(struct repository *r,
 		 */
 		return 0;
 
-	if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
+	if (for_each_remote_ref_submodule(subrepo, has_remote, NULL) > 0) {
 		struct child_process cp = CHILD_PROCESS_INIT;
 		struct strbuf buf = STRBUF_INIT;
 		int needs_pushing = 0;
@@ -1032,20 +1047,18 @@ int find_unpushed_submodules(struct repository *r,
 
 	for_each_string_list_item(name, &submodules) {
 		struct oid_array *commits = name->util;
-		const struct submodule *submodule;
-		const char *path = NULL;
-
-		submodule = submodule_from_name(r, &null_oid, name->string);
-		if (submodule)
-			path = submodule->path;
-		else
-			path = default_name_or_path(name->string);
+		struct repository *subrepo;
 
-		if (!path)
+		subrepo = get_changed_submodule_repo(r, name->string);
+		if (!subrepo)
 			continue;
 
-		if (submodule_needs_pushing(r, path, commits))
-			string_list_insert(needs_pushing, path);
+		if (submodule_needs_pushing(subrepo, commits))
+			string_list_insert(needs_pushing,
+					   subrepo->submodule_prefix);
+
+		repo_clear(subrepo);
+		free(subrepo);
 	}
 
 	free_submodules_oids(&submodules);
@@ -1060,7 +1073,12 @@ static int push_submodule(const char *path,
 			  const struct string_list *push_options,
 			  int dry_run)
 {
-	if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
+	struct repository *subrepo = open_submodule(path);
+	int have_remote = for_each_remote_ref_submodule(subrepo, has_remote, NULL);
+	repo_clear(subrepo);
+	free(subrepo);
+
+	if (have_remote > 0) {
 		struct child_process cp = CHILD_PROCESS_INIT;
 		strvec_push(&cp.args, "push");
 		if (dry_run)
@@ -1219,22 +1237,19 @@ static void calculate_changed_submodule_paths(struct repository *r,
 
 	for_each_string_list_item(name, changed_submodule_names) {
 		struct oid_array *commits = name->util;
-		const struct submodule *submodule;
-		const char *path = NULL;
-
-		submodule = submodule_from_name(r, &null_oid, name->string);
-		if (submodule)
-			path = submodule->path;
-		else
-			path = default_name_or_path(name->string);
+		struct repository *subrepo;
 
-		if (!path)
+		subrepo = get_changed_submodule_repo(r, name->string);
+		if (!subrepo)
 			continue;
 
-		if (submodule_has_commits(r, path, commits)) {
+		if (submodule_has_commits(subrepo, commits)) {
 			oid_array_clear(commits);
 			*name->string = '\0';
 		}
+
+		repo_clear(subrepo);
+		free(subrepo);
 	}
 
 	string_list_remove_empty_items(changed_submodule_names, 1);
-- 
2.26.2


  parent reply	other threads:[~2020-09-29 16:14 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-29 15:53 [PATCH 0/7] Submodules and partial clones Andrew Oakley
2020-09-29 15:53 ` [PATCH 1/7] refs: store owning repository for object lookup Andrew Oakley
2020-09-29 15:53 ` Andrew Oakley [this message]
2020-09-29 15:53 ` [PATCH 3/7] Add failing test for partial clones with submodules Andrew Oakley
2020-09-29 15:53 ` [PATCH 4/7] refs: use correct repo in refs_peel_ref Andrew Oakley
2020-09-29 15:53 ` [PATCH 5/7] merge-recursive: use separate submodule repository Andrew Oakley
2020-09-29 15:53 ` [PATCH 6/7] submodule: remove add_submodule_odb Andrew Oakley
2020-09-29 15:53 ` [PATCH 7/7] submodule: use partial clone filter Andrew Oakley
2020-09-29 18:05 ` [PATCH 0/7] Submodules and partial clones Jonathan Tan
2020-09-30 13:28   ` Andrew Oakley
2020-09-30 20:41     ` Jonathan Tan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200929155350.49066-3-andrew@adoakley.name \
    --to=andrew@adoakley.name \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    --cc=luke@diamand.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).