git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Stefan Beller <sbeller@google.com>
To: git@vger.kernel.org
Cc: hvoigt@hvoigt.net, Stefan Beller <sbeller@google.com>
Subject: [PATCH 10/10] fetch: retry fetching submodules if sha1 were not fetched
Date: Wed,  8 Aug 2018 15:17:52 -0700
Message-ID: <20180808221752.195419-11-sbeller@google.com> (raw)
In-Reply-To: <20180808221752.195419-1-sbeller@google.com>

Currently when git-fetch is asked to recurse into submodules, it dispatches
a plain "git-fetch -C <submodule-dir>" (and some submodule related options
such as prefix and recusing strategy, but) without any information of the
remote or the tip that should be fetched.

This works surprisingly well in some workflows (such as using submodules
as a third party library), while not so well in other scenarios, such
as in a Gerrit topic-based workflow, that can tie together changes
(potentially across repositories) on the server side. One of the parts
of such a Gerrit workflow is to download a change when wanting to examine
it, and you'd want to have its submodule changes that are in the same
topic downloaded as well. However these submodule changes reside in their
own repository in their on ref (refs/changes/<int>).

Retry fetching a submodule if the object id that the superproject points
to, cannot be found.

Note: This is an RFC and doesn't support fetching to FETCH_HEAD yet, but
only into a local branch. To make fetching into FETCH_HEAD work, we need
some refactoring in builtin/fetch.c to adjust the calls to
'check_for_new_submodule_commits'.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 builtin/fetch.c             |  9 ++---
 submodule.c                 | 79 ++++++++++++++++++++++++++++++++++++-
 t/t5526-fetch-submodules.sh | 16 ++++++++
 3 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 34d2bd123b3..9396e6a44c6 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -700,8 +700,7 @@ static int update_local_ref(struct ref *ref,
 			what = _("[new ref]");
 		}
 
-		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
-		    (recurse_submodules != RECURSE_SUBMODULES_ON))
+		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
 			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref(msg, ref, 0);
 		format_display(display, r ? '!' : '*', what,
@@ -716,8 +715,7 @@ static int update_local_ref(struct ref *ref,
 		strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
 		strbuf_addstr(&quickref, "..");
 		strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
-		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
-		    (recurse_submodules != RECURSE_SUBMODULES_ON))
+		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
 			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref("fast-forward", ref, 1);
 		format_display(display, r ? '!' : ' ', quickref.buf,
@@ -731,8 +729,7 @@ static int update_local_ref(struct ref *ref,
 		strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
 		strbuf_addstr(&quickref, "...");
 		strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
-		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
-		    (recurse_submodules != RECURSE_SUBMODULES_ON))
+		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
 			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref("forced-update", ref, 1);
 		format_display(display, r ? '!' : '+', quickref.buf,
diff --git a/submodule.c b/submodule.c
index ec7ea6f8c2d..6cbd0b1a470 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1127,6 +1127,7 @@ struct submodule_parallel_fetch {
 	int result;
 
 	struct string_list changed_submodule_names;
+	struct string_list retry;
 };
 #define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
 
@@ -1259,8 +1260,10 @@ static int get_next_submodule(struct child_process *cp,
 {
 	int ret = 0;
 	struct submodule_parallel_fetch *spf = data;
+	struct string_list_item *retry_it;
 
 	for (; spf->count < spf->r->index->cache_nr; spf->count++) {
+		int recurse_config;
 		struct strbuf submodule_prefix = STRBUF_INIT;
 		const struct cache_entry *ce = spf->r->index->cache[spf->count];
 		const char *git_dir, *default_argv;
@@ -1280,7 +1283,9 @@ static int get_next_submodule(struct child_process *cp,
 			}
 		}
 
-		switch (get_fetch_recurse_config(submodule, spf))
+		recurse_config = get_fetch_recurse_config(submodule, spf);
+
+		switch (recurse_config)
 		{
 		default:
 		case RECURSE_SUBMODULES_DEFAULT:
@@ -1318,9 +1323,46 @@ static int get_next_submodule(struct child_process *cp,
 		strbuf_release(&submodule_prefix);
 		if (ret) {
 			spf->count++;
+			if (submodule != &default_submodule)
+				/* discard const-ness: */
+				*task_cb = (void*)submodule;
 			return 1;
 		}
 	}
+
+retry_next:
+	retry_it = string_list_pop(&spf->retry);
+	if (retry_it) {
+		struct strbuf submodule_prefix = STRBUF_INIT;
+		const struct submodule *sub =
+				submodule_from_name(spf->r,
+						    &null_oid,
+						    retry_it->string);
+
+		child_process_init(cp);
+		cp->dir = get_submodule_git_dir(spf->r, sub->path);
+		if (!cp->dir)
+			goto retry_next;
+		prepare_submodule_repo_env_in_gitdir(&cp->env_array);
+		cp->git_cmd = 1;
+
+		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, sub->path);
+		argv_array_init(&cp->args);
+		argv_array_pushv(&cp->args, spf->args.argv);
+		argv_array_push(&cp->args, "on-demand");
+		argv_array_push(&cp->args, "--submodule-prefix");
+		argv_array_push(&cp->args, submodule_prefix.buf);
+
+		/* NEEDSWORK: have get_default_remote from s--h */
+		argv_array_push(&cp->args, "origin");
+		oid_array_for_each_unique(retry_it->util,
+					  append_oid_to_argv, &cp->args);
+
+		*task_cb = NULL; /* make sure we do not recurse forever */
+		strbuf_release(&submodule_prefix);
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -1334,14 +1376,49 @@ static int fetch_start_failure(struct strbuf *err,
 	return 0;
 }
 
+static int commit_exists_in_sub(const struct object_id *oid, void *data)
+{
+	struct repository *subrepo = data;
+
+	enum object_type type = oid_object_info(subrepo, oid, NULL);
+
+	return type == OBJ_COMMIT;
+}
+
 static int fetch_finish(int retvalue, struct strbuf *err,
 			void *cb, void *task_cb)
 {
 	struct submodule_parallel_fetch *spf = cb;
+	struct submodule *sub = task_cb;
+	struct repository subrepo;
 
 	if (retvalue)
 		spf->result = 1;
 
+	if (!sub)
+		return 0;
+
+	if (repo_submodule_init(&subrepo, spf->r, sub->path) < 0)
+		warning(_("Could not get submodule repository for submodule '%s' in repository '%s'"),
+			  sub->path, spf->r->worktree);
+	else {
+		struct string_list_item *it;
+		struct oid_array *commits;
+
+		it = string_list_lookup(&spf->changed_submodule_names, sub->name);
+		if (!it)
+			return 0;
+
+		commits = it->util;
+		oid_array_remove_if(commits,
+				    commit_exists_in_sub,
+				    &subrepo);
+
+		if (commits->nr)
+			string_list_append(&spf->retry, sub->name)
+				->util = commits;
+	}
+
 	return 0;
 }
 
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index 4437cb17698..ade4bf6d0bc 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -606,4 +606,20 @@ test_expect_success "fetch new commits when submodule got renamed" '
 	test_cmp expect actual
 '
 
+test_expect_success "fetch new commits on-demand when they are not reachable" '
+	git checkout --detach &&
+	C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
+	git -C submodule update-ref refs/changes/1 $C &&
+	git update-index --cacheinfo 160000 $C submodule &&
+	git commit -m "updated submodule outside of refs/heads" &&
+	D=$(git rev-parse HEAD) &&
+	git update-ref refs/changes/2 $D &&
+	(
+		cd downstream &&
+		git fetch --recurse-submodules --recurse-submodules-default on-demand origin refs/changes/2:refs/heads/my_branch &&
+		git -C submodule cat-file -t $C &&
+		git checkout --recurse-submodules FETCH_HEAD
+	)
+'
+
 test_done
-- 
2.18.0.597.ga71716f1ad-goog


  parent reply index

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-08 22:17 [RFC PATCH 00/10] fetch: make sure submodule oids are fetched Stefan Beller
2018-08-08 22:17 ` [PATCH 01/10] string_list: print_string_list to use trace_printf Stefan Beller
2018-08-09 21:16   ` Junio C Hamano
2018-08-09 21:40     ` Stefan Beller
2018-08-08 22:17 ` [PATCH 02/10] string-list.h: add string_list_pop function Stefan Beller
2018-08-09  7:35   ` Martin Ågren
2018-08-09 21:29     ` Junio C Hamano
2018-08-09 21:41       ` Jeff King
2018-08-09 21:52         ` Stefan Beller
2018-08-09 21:56           ` Jeff King
2018-08-09 22:10             ` Stefan Beller
2018-08-08 22:17 ` [PATCH 03/10] sha1-array: provide oid_array_remove_if Stefan Beller
2018-08-09  7:39   ` Martin Ågren
2018-08-09 17:25     ` Stefan Beller
2018-08-09 19:24       ` Jeff King
2018-08-09 21:46         ` Junio C Hamano
2018-08-09 21:44   ` Junio C Hamano
2018-08-08 22:17 ` [PATCH 04/10] submodule.c: convert submodule_move_head new argument to object id Stefan Beller
2018-08-09 22:00   ` Junio C Hamano
2018-08-08 22:17 ` [PATCH 05/10] submodule.c: fix indentation Stefan Beller
2018-08-08 22:17 ` [PATCH 06/10] submodule.c: sort changed_submodule_names before searching it Stefan Beller
2018-08-08 22:17 ` [PATCH 07/10] submodule: move global changed_submodule_names into fetch submodule struct Stefan Beller
2018-08-08 22:17 ` [PATCH 08/10] submodule.c: do not copy around submodule list Stefan Beller
2018-08-08 22:17 ` [PATCH 09/10] submodule: fetch in submodules git directory instead of in worktree Stefan Beller
2018-08-08 22:17 ` Stefan Beller [this message]
2018-08-09  7:50   ` [PATCH 10/10] fetch: retry fetching submodules if sha1 were not fetched Martin Ågren
2018-08-09 17:42     ` Stefan Beller

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180808221752.195419-11-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    --cc=hvoigt@hvoigt.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox