git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip
@ 2018-10-25 23:32 Stefan Beller
  2018-10-25 23:32 ` [PATCH 01/10] sha1-array: provide oid_array_filter Stefan Beller
                   ` (10 more replies)
  0 siblings, 11 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

Thanks Jonathan for your thoughtful comments,
here is the product of the discussion:
* I split up the patch to fetch in the worktree to be 2 patches,
  each giving motivation on its own.
* the last patch is vastly simplified in code, but takes an extra test
* in [1], you remark "commits" ought not to be a pointer, but I decided against
  that, as we keep the pointed-at value around for the same time span (until
  we're done with that submodule) and we don't need to copy over the pointed-at
  value into the new struct.

[1] https://public-inbox.org/git/20181018003954.139498-1-jonathantanmy@google.com/

This is still based on ao/submodule-wo-gitmodules-checked-out.
previous version
https://public-inbox.org/git/20181016181327.107186-1-sbeller@google.com/

Stefan Beller (10):
  sha1-array: provide oid_array_filter
  submodule.c: fix indentation
  submodule.c: sort changed_submodule_names before searching it
  submodule.c: tighten scope of changed_submodule_names struct
  submodule: store OIDs in changed_submodule_names
  repository: repo_submodule_init to take a submodule struct
  submodule: migrate get_next_submodule to use repository structs
  submodule.c: fetch in submodules git directory instead of in worktree
  fetch: try fetching submodules if needed objects were not fetched
  builtin/fetch: check for submodule updates in any ref update

 Documentation/technical/api-oid-array.txt    |   5 +
 builtin/fetch.c                              |  11 +-
 builtin/grep.c                               |  17 +-
 builtin/ls-files.c                           |  12 +-
 builtin/submodule--helper.c                  |   2 +-
 repository.c                                 |  27 +-
 repository.h                                 |  12 +-
 sha1-array.c                                 |  17 ++
 sha1-array.h                                 |   3 +
 submodule.c                                  | 265 ++++++++++++++++---
 t/helper/test-submodule-nested-repo-config.c |   8 +-
 t/t5526-fetch-submodules.sh                  |  55 ++++
 12 files changed, 346 insertions(+), 88 deletions(-)

  git range-diff origin/sb/submodule-recursive-fetch-gets-the-tip... 

1:  3fbb06aedd ! 1:  0fdb0e2ad9 submodule.c: move global changed_submodule_names into fetch submodule struct
    @@ -1,12 +1,11 @@
     Author: Stefan Beller <sbeller@google.com>
     
    -    submodule.c: move global changed_submodule_names into fetch submodule struct
    +    submodule.c: tighten scope of changed_submodule_names struct
     
         The `changed_submodule_names` are only used for fetching, so let's make it
         part of the struct that is passed around for fetching submodules.
     
         Signed-off-by: Stefan Beller <sbeller@google.com>
    -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
     diff --git a/submodule.c b/submodule.c
     --- a/submodule.c
    @@ -16,7 +15,6 @@
      
      static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
     -static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP;
    -+
      static int initialized_fetch_ref_tips;
      static struct oid_array ref_tips_before_fetch;
      static struct oid_array ref_tips_after_fetch;
    @@ -25,22 +23,8 @@
      }
      
     -static void calculate_changed_submodule_paths(void)
    -+struct submodule_parallel_fetch {
    -+	int count;
    -+	struct argv_array args;
    -+	struct repository *r;
    -+	const char *prefix;
    -+	int command_line_option;
    -+	int default_option;
    -+	int quiet;
    -+	int result;
    -+
    -+	struct string_list changed_submodule_names;
    -+};
    -+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
    -+
     +static void calculate_changed_submodule_paths(
    -+	struct submodule_parallel_fetch *spf)
    ++	struct string_list *changed_submodule_names)
      {
      	struct argv_array argv = ARGV_ARRAY_INIT;
      	struct string_list changed_submodules = STRING_LIST_INIT_DUP;
    @@ -49,30 +33,23 @@
      
      		if (!submodule_has_commits(path, commits))
     -			string_list_append(&changed_submodule_names, name->string);
    -+			string_list_append(&spf->changed_submodule_names,
    ++			string_list_append(changed_submodule_names,
     +					   name->string);
      	}
      
      	free_submodules_oids(&changed_submodules);
     @@
    - 	return ret;
    - }
    - 
    --struct submodule_parallel_fetch {
    --	int count;
    --	struct argv_array args;
    --	struct repository *r;
    --	const char *prefix;
    --	int command_line_option;
    --	int default_option;
    --	int quiet;
    --	int result;
    --};
    + 	int default_option;
    + 	int quiet;
    + 	int result;
    ++
    ++	struct string_list changed_submodule_names;
    + };
     -#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0}
    --
    ++#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
    + 
      static int get_fetch_recurse_config(const struct submodule *submodule,
      				    struct submodule_parallel_fetch *spf)
    - {
     @@
      		case RECURSE_SUBMODULES_ON_DEMAND:
      			if (!submodule ||
    @@ -88,7 +65,7 @@
      
     -	calculate_changed_submodule_paths();
     -	string_list_sort(&changed_submodule_names);
    -+	calculate_changed_submodule_paths(&spf);
    ++	calculate_changed_submodule_paths(&spf.changed_submodule_names);
     +	string_list_sort(&spf.changed_submodule_names);
      	run_processes_parallel(max_parallel_jobs,
      			       get_next_submodule,
2:  a64a8322a1 ! 2:  a11e6e39a2 submodule.c: do not copy around submodule list
    @@ -1,6 +1,6 @@
     Author: Stefan Beller <sbeller@google.com>
     
    -    submodule.c: do not copy around submodule list
    +    submodule: store OIDs in changed_submodule_names
     
         'calculate_changed_submodule_paths' uses a local list to compute the
         changed submodules, and then produces the result by copying appropriate
    @@ -14,13 +14,13 @@
         useful in a later patch.
     
         Signed-off-by: Stefan Beller <sbeller@google.com>
    -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
    +    Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
     
     diff --git a/submodule.c b/submodule.c
     --- a/submodule.c
     +++ b/submodule.c
     @@
    - 	struct submodule_parallel_fetch *spf)
    + 	struct string_list *changed_submodule_names)
      {
      	struct argv_array argv = ARGV_ARRAY_INIT;
     -	struct string_list changed_submodules = STRING_LIST_INIT_DUP;
    @@ -34,10 +34,10 @@
      	 * commits have been recorded upstream in "changed_submodule_names".
      	 */
     -	collect_changed_submodules(&changed_submodules, &argv);
    -+	collect_changed_submodules(&spf->changed_submodule_names, &argv);
    ++	collect_changed_submodules(changed_submodule_names, &argv);
      
     -	for_each_string_list_item(name, &changed_submodules) {
    -+	for_each_string_list_item(name, &spf->changed_submodule_names) {
    ++	for_each_string_list_item(name, changed_submodule_names) {
      		struct oid_array *commits = name->util;
      		const struct submodule *submodule;
      		const char *path = NULL;
    @@ -46,7 +46,7 @@
      			continue;
      
     -		if (!submodule_has_commits(path, commits))
    --			string_list_append(&spf->changed_submodule_names,
    +-			string_list_append(changed_submodule_names,
     -					   name->string);
     +		if (submodule_has_commits(path, commits)) {
     +			oid_array_clear(commits);
    @@ -55,7 +55,7 @@
      	}
      
     -	free_submodules_oids(&changed_submodules);
    -+	string_list_remove_empty_items(&spf->changed_submodule_names, 1);
    ++	string_list_remove_empty_items(changed_submodule_names, 1);
     +
      	argv_array_clear(&argv);
      	oid_array_clear(&ref_tips_before_fetch);
3:  9341b92c83 ! 3:  3f4e0d4b8d repository: repo_submodule_init to take a submodule struct
    @@ -5,17 +5,19 @@
         When constructing a struct repository for a submodule for some revision
         of the superproject where the submodule is not contained in the index,
         it may not be present in the working tree currently either. In that
    -    siutation giving a 'path' argument is not useful. Upgrade the
    +    situation giving a 'path' argument is not useful. Upgrade the
         repo_submodule_init function to take a struct submodule instead.
    +    The submodule struct can be obtained via submodule_from_{path, name} or
    +    an artificial submodule struct can be passed in.
     
    -    While we are at it, overhaul the repo_submodule_init function by renaming
    -    the submodule repository struct, which is to be initialized, to a name
    -    that is not confused with the struct submodule as easily.
    +    While we are at it, rename the repository struct in the repo_submodule_init
    +    function, which is to be initialized, to a name that is not confused with
    +    the struct submodule as easily. Perform such renames in similar functions
    +    as well.
     
         Also move its documentation into the header file.
     
         Signed-off-by: Stefan Beller <sbeller@google.com>
    -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
     diff --git a/builtin/grep.c b/builtin/grep.c
     --- a/builtin/grep.c
    @@ -104,6 +106,19 @@
      
      static void show_ce(struct repository *repo, struct dir_struct *dir,
     
    +diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
    +--- a/builtin/submodule--helper.c
    ++++ b/builtin/submodule--helper.c
    +@@
    + 	if (!sub)
    + 		BUG("We could get the submodule handle before?");
    + 
    +-	if (repo_submodule_init(&subrepo, the_repository, path))
    ++	if (repo_submodule_init(&subrepo, the_repository, sub))
    + 		die(_("could not get a repository handle for submodule '%s'"), path);
    + 
    + 	if (!repo_config_get_string(&subrepo, "core.worktree", &cw)) {
    +
     diff --git a/repository.c b/repository.c
     --- a/repository.c
     +++ b/repository.c
    @@ -178,7 +193,8 @@
     +/*
     + * Initialize the repository 'subrepo' as the submodule given by the
     + * struct submodule 'sub' in parent repository 'superproject'.
    -+ * Return 0 upon success and a non-zero value upon failure.
    ++ * Return 0 upon success and a non-zero value upon failure, which may happen
    ++ * if the submodule is not found, or 'sub' is NULL.
     + */
     +struct submodule;
     +int repo_submodule_init(struct repository *subrepo,
4:  cea909cbd4 ! 4:  b1566069e7 submodule: fetch in submodules git directory instead of in worktree
    @@ -1,44 +1,19 @@
     Author: Stefan Beller <sbeller@google.com>
     
    -    submodule: fetch in submodules git directory instead of in worktree
    +    submodule: migrate get_next_submodule to use repository structs
     
    -    This patch started as a refactoring to make 'get_next_submodule' more
    -    readable, but upon doing so, I realized that "git fetch" of the submodule
    -    actually doesn't need to be run in the submodules worktree. So let's run
    -    it in its git dir instead.
    +    We used to recurse into submodules, even if they were broken having
    +    only an objects directory. The child process executed in the submodule
    +    would fail though if the submodule was broken.
     
    -    That should pave the way towards fetching submodules that are currently
    -    not checked out.
    -
    -    This patch leaks the cp->dir in get_next_submodule, as any further
    -    callback in run_processes_parallel doesn't have access to the child
    -    process any more. In an early iteration of this patch, the function
    -    get_submodule_repo_for directly returned the string containing the
    -    git directory, which would be a better design choice for this patch.
    -
    -    However the next patch both fixes the memory leak of cp->dir and also has
    -    a use case for using the full repository handle of the submodule, so
    -    it makes sense to introduce the get_submodule_repo_for here already.
    +    This patch tightens the check upfront, such that we do not need
    +    to spawn a child process to find out if the submodule is broken.
     
         Signed-off-by: Stefan Beller <sbeller@google.com>
    -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
     diff --git a/submodule.c b/submodule.c
     --- a/submodule.c
     +++ b/submodule.c
    -@@
    - 			 DEFAULT_GIT_DIR_ENVIRONMENT);
    - }
    - 
    -+static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
    -+{
    -+	prepare_submodule_repo_env_no_git_dir(out);
    -+	argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
    -+}
    -+
    - /* Helper function to display the submodule header line prior to the full
    -  * summary output. If it can locate the submodule objects directory it will
    -  * attempt to lookup both the left and right commits and put them into the
     @@
      	return spf->default_option;
      }
    @@ -99,9 +74,8 @@
     +		if (repo) {
      			child_process_init(cp);
     -			cp->dir = strbuf_detach(&submodule_path, NULL);
    --			prepare_submodule_repo_env(&cp->env_array);
    -+			prepare_submodule_repo_env_in_gitdir(&cp->env_array);
    -+			cp->dir = xstrdup(repo->gitdir);
    + 			prepare_submodule_repo_env(&cp->env_array);
    ++			cp->dir = xstrdup(repo->worktree);
      			cp->git_cmd = 1;
      			if (!spf->quiet)
      				strbuf_addf(err, "Fetching submodule %s%s\n",
    @@ -113,27 +87,17 @@
     +			repo_clear(repo);
     +			free(repo);
      			ret = 1;
    ++		} else {
    ++			/*
    ++			 * An empty directory is normal,
    ++			 * the submodule is not initialized
    ++			 */
    ++			if (S_ISGITLINK(ce->ce_mode) &&
    ++			    !is_empty_dir(ce->name))
    ++				die(_("Could not access submodule '%s'"), ce->name);
      		}
     -		strbuf_release(&submodule_path);
     -		strbuf_release(&submodule_git_dir);
      		strbuf_release(&submodule_prefix);
      		if (ret) {
      			spf->count++;
    -
    -diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
    ---- a/t/t5526-fetch-submodules.sh
    -+++ b/t/t5526-fetch-submodules.sh
    -@@
    - 
    - 	test_must_fail git -C dst status &&
    - 	test_must_fail git -C dst diff &&
    --	test_must_fail git -C dst fetch --recurse-submodules
    -+
    -+	# git-fetch cannot find the git directory of the submodule,
    -+	# so it will do nothing, successfully, as it cannot distinguish between
    -+	# this broken submodule and a submodule that was just set active but
    -+	# not cloned yet
    -+	git -C dst fetch --recurse-submodules
    - '
    - 
    - test_expect_success "fetch new commits when submodule got renamed" '
-:  ---------- > 5:  2d98ff1201 submodule.c: fetch in submodules git directory instead of in worktree
5:  9ad0125310 ! 6:  092b9cbcba fetch: retry fetching submodules if needed objects were not fetched
    @@ -1,25 +1,23 @@
     Author: Stefan Beller <sbeller@google.com>
     
    -    fetch: retry fetching submodules if needed objects were not fetched
    +    fetch: try fetching submodules if needed objects were not fetched
     
         Currently when git-fetch is asked to recurse into submodules, it dispatches
         a plain "git-fetch -C <submodule-dir>" (with some submodule related options
         such as prefix and recusing strategy, but) without any information of the
         remote or the tip that should be fetched.
     
    -    This works surprisingly well in some workflows (such as using submodules
    -    as a third party library), while not so well in other scenarios, such
    -    as in a Gerrit topic-based workflow, that can tie together changes
    -    (potentially across repositories) on the server side. One of the parts
    -    of such a Gerrit workflow is to download a change when wanting to examine
    -    it, and you'd want to have its submodule changes that are in the same
    -    topic downloaded as well. However these submodule changes reside in their
    -    own repository in their own ref (refs/changes/<int>).
    +    But this default fetch is not sufficient, as a newly fetched commit in
    +    the superproject could point to a commit in the submodule that is not
    +    in the default refspec. This is common in workflows like Gerrit's.
    +    When fetching a Gerrit change under review (from refs/changes/??), the
    +    commits in that change likely point to submodule commits that have not
    +    been merged to a branch yet.
     
    -    Retry fetching a submodule by object name if the object id that the
    +    Try fetching a submodule by object id if the object id that the
         superproject points to, cannot be found.
     
    -    This retrying does not happen when the "git fetch" done at the
    +    The try does not happen when the "git fetch" done at the
         superproject is not storing the fetched results in remote
         tracking branches (i.e. instead just recording them to
         FETCH_HEAD) in this step. A later patch will fix this.
    @@ -32,7 +30,6 @@
         in case the submodule recursion is set to "on".
     
         Signed-off-by: Stefan Beller <sbeller@google.com>
    -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
     
     diff --git a/builtin/fetch.c b/builtin/fetch.c
     --- a/builtin/fetch.c
    @@ -75,16 +72,16 @@
      	int result;
      
      	struct string_list changed_submodule_names;
    -+	struct get_next_submodule_task **retry;
    -+	int retry_nr, retry_alloc;
    ++	struct get_next_submodule_task **fetch_specific_oids;
    ++	int fetch_specific_oids_nr, fetch_specific_oids_alloc;
      };
     -#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
     +#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, \
     +		  STRING_LIST_INIT_DUP, \
     +		  NULL, 0, 0}
      
    - static void calculate_changed_submodule_paths(
    - 	struct submodule_parallel_fetch *spf)
    + static int get_fetch_recurse_config(const struct submodule *submodule,
    + 				    struct submodule_parallel_fetch *spf)
     @@
      	return spf->default_option;
      }
    @@ -93,6 +90,8 @@
     +	struct repository *repo;
     +	const struct submodule *sub;
     +	unsigned free_sub : 1; /* Do we need to free the submodule? */
    ++
    ++	/* fetch specific oids if set, otherwise fetch default refspec */
     +	struct oid_array *commits;
     +};
     +
    @@ -224,24 +223,29 @@
     -			repo_clear(repo);
     -			free(repo);
     -			ret = 1;
    --		}
    --		strbuf_release(&submodule_prefix);
    --		if (ret) {
    - 			spf->count++;
    ++			spf->count++;
     +			*task_cb = task;
     +
     +			strbuf_release(&submodule_prefix);
    - 			return 1;
    -+		} else {
    ++			return 1;
    + 		} else {
    ++
     +			get_next_submodule_task_release(task);
     +			free(task);
    ++
    + 			/*
    + 			 * An empty directory is normal,
    + 			 * the submodule is not initialized
    +@@
    + 			    !is_empty_dir(ce->name))
    + 				die(_("Could not access submodule '%s'"), ce->name);
      		}
    - 	}
    ++	}
     +
    -+	if (spf->retry_nr) {
    -+		struct get_next_submodule_task *task = spf->retry[spf->retry_nr - 1];
    ++	if (spf->fetch_specific_oids_nr) {
    ++		struct get_next_submodule_task *task = spf->fetch_specific_oids[spf->fetch_specific_oids_nr - 1];
     +		struct strbuf submodule_prefix = STRBUF_INIT;
    -+		spf->retry_nr--;
    ++		spf->fetch_specific_oids_nr--;
     +
     +		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, task->sub->path);
     +
    @@ -262,9 +266,13 @@
     +					  append_oid_to_argv, &cp->args);
     +
     +		*task_cb = task;
    -+		strbuf_release(&submodule_prefix);
    + 		strbuf_release(&submodule_prefix);
    +-		if (ret) {
    +-			spf->count++;
    +-			return 1;
    +-		}
     +		return 1;
    -+	}
    + 	}
     +
      	return 0;
      }
    @@ -326,9 +334,11 @@
     +			return 0;
     +
     +		task->commits = commits;
    -+		ALLOC_GROW(spf->retry, spf->retry_nr + 1, spf->retry_alloc);
    -+		spf->retry[spf->retry_nr] = task;
    -+		spf->retry_nr++;
    ++		ALLOC_GROW(spf->fetch_specific_oids,
    ++			   spf->fetch_specific_oids_nr + 1,
    ++			   spf->fetch_specific_oids_alloc);
    ++		spf->fetch_specific_oids[spf->fetch_specific_oids_nr] = task;
    ++		spf->fetch_specific_oids_nr++;
     +		return 0;
     +	}
     +
    @@ -346,18 +356,33 @@
      	test_cmp expect actual
      '
      
    -+test_expect_success "fetch new commits on-demand when they are not reachable" '
    ++test_expect_success "fetch new submodule commits on-demand outside standard refspec" '
    ++	# add a second submodule and ensure it is around in downstream first
    ++	git clone submodule sub1 &&
    ++	git submodule add ./sub1 &&
    ++	git commit -m "adding a second submodule" &&
    ++	git -C downstream pull &&
    ++	git -C downstream submodule update --init --recursive &&
    ++
     +	git checkout --detach &&
    ++
     +	C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
     +	git -C submodule update-ref refs/changes/1 $C &&
     +	git update-index --cacheinfo 160000 $C submodule &&
    -+	git commit -m "updated submodule outside of refs/heads" &&
    -+	D=$(git rev-parse HEAD) &&
    -+	git update-ref refs/changes/2 $D &&
    ++	test_tick &&
    ++
    ++	D=$(git -C sub1 commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
    ++	git -C sub1 update-ref refs/changes/2 $D &&
    ++	git update-index --cacheinfo 160000 $D sub1 &&
    ++
    ++	git commit -m "updated submodules outside of refs/heads" &&
    ++	E=$(git rev-parse HEAD) &&
    ++	git update-ref refs/changes/2 $E &&
     +	(
     +		cd downstream &&
    -+		git fetch --recurse-submodules --recurse-submodules-default on-demand origin refs/changes/2:refs/heads/my_branch &&
    ++		git fetch --recurse-submodules origin refs/changes/2:refs/heads/my_branch &&
     +		git -C submodule cat-file -t $C &&
    ++		git -C sub1 cat-file -t $D &&
     +		git checkout --recurse-submodules FETCH_HEAD
     +	)
     +'
6:  b8db3b45bf < -:  ---------- builtin/fetch: check for submodule updates for non branch fetches
7:  905a4f0d4f < -:  ---------- fixup! repository: repo_submodule_init to take a submodule struct
-:  ---------- > 7:  11bf819782 builtin/fetch: check for submodule updates in any ref update

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 01/10] sha1-array: provide oid_array_filter
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-25 23:32 ` [PATCH 02/10] submodule.c: fix indentation Stefan Beller
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller, Junio C Hamano

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/technical/api-oid-array.txt |  5 +++++
 sha1-array.c                              | 17 +++++++++++++++++
 sha1-array.h                              |  3 +++
 3 files changed, 25 insertions(+)

diff --git a/Documentation/technical/api-oid-array.txt b/Documentation/technical/api-oid-array.txt
index 9febfb1d52..c97428c2c3 100644
--- a/Documentation/technical/api-oid-array.txt
+++ b/Documentation/technical/api-oid-array.txt
@@ -48,6 +48,11 @@ Functions
 	is not sorted, this function has the side effect of sorting
 	it.
 
+`oid_array_filter`::
+	Apply the callback function `want` to each entry in the array,
+	retaining only the entries for which the function returns true.
+	Preserve the order of the entries that are retained.
+
 Examples
 --------
 
diff --git a/sha1-array.c b/sha1-array.c
index 265941fbf4..d505a004bb 100644
--- a/sha1-array.c
+++ b/sha1-array.c
@@ -77,3 +77,20 @@ int oid_array_for_each_unique(struct oid_array *array,
 	}
 	return 0;
 }
+
+void oid_array_filter(struct oid_array *array,
+		      for_each_oid_fn want,
+		      void *cb_data)
+{
+	unsigned nr = array->nr, src, dst;
+	struct object_id *oids = array->oid;
+
+	for (src = dst = 0; src < nr; src++) {
+		if (want(&oids[src], cb_data)) {
+			if (src != dst)
+				oidcpy(&oids[dst], &oids[src]);
+			dst++;
+		}
+	}
+	array->nr = dst;
+}
diff --git a/sha1-array.h b/sha1-array.h
index 232bf95017..55d016c4bf 100644
--- a/sha1-array.h
+++ b/sha1-array.h
@@ -22,5 +22,8 @@ int oid_array_for_each(struct oid_array *array,
 int oid_array_for_each_unique(struct oid_array *array,
 			      for_each_oid_fn fn,
 			      void *data);
+void oid_array_filter(struct oid_array *array,
+		      for_each_oid_fn want,
+		      void *cbdata);
 
 #endif /* SHA1_ARRAY_H */
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 02/10] submodule.c: fix indentation
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
  2018-10-25 23:32 ` [PATCH 01/10] sha1-array: provide oid_array_filter Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-25 23:32 ` [PATCH 03/10] submodule.c: sort changed_submodule_names before searching it Stefan Beller
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller, Junio C Hamano

The submodule subsystem is really bad at staying within 80 characters.
Fix it while we are here.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 submodule.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/submodule.c b/submodule.c
index 2b7082b2db..e145ebbb16 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1258,7 +1258,8 @@ static int get_next_submodule(struct child_process *cp,
 		if (!submodule) {
 			const char *name = default_name_or_path(ce->name);
 			if (name) {
-				default_submodule.path = default_submodule.name = name;
+				default_submodule.path = name;
+				default_submodule.name = name;
 				submodule = &default_submodule;
 			}
 		}
@@ -1268,8 +1269,10 @@ static int get_next_submodule(struct child_process *cp,
 		default:
 		case RECURSE_SUBMODULES_DEFAULT:
 		case RECURSE_SUBMODULES_ON_DEMAND:
-			if (!submodule || !unsorted_string_list_lookup(&changed_submodule_names,
-							 submodule->name))
+			if (!submodule ||
+			    !unsorted_string_list_lookup(
+					&changed_submodule_names,
+					submodule->name))
 				continue;
 			default_argv = "on-demand";
 			break;
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 03/10] submodule.c: sort changed_submodule_names before searching it
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
  2018-10-25 23:32 ` [PATCH 01/10] sha1-array: provide oid_array_filter Stefan Beller
  2018-10-25 23:32 ` [PATCH 02/10] submodule.c: fix indentation Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-25 23:32 ` [PATCH 04/10] submodule.c: tighten scope of changed_submodule_names struct Stefan Beller
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller, Junio C Hamano

We can string_list_insert() to maintain sorted-ness of the
list as we find new items, or we can string_list_append() to
build an unsorted list and sort it at the end just once.

As we do not rely on the sortedness while building the
list, we pick the "append and sort at the end" as it
has better worst case execution times.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 submodule.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/submodule.c b/submodule.c
index e145ebbb16..9fbfcfcfe1 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1270,7 +1270,7 @@ static int get_next_submodule(struct child_process *cp,
 		case RECURSE_SUBMODULES_DEFAULT:
 		case RECURSE_SUBMODULES_ON_DEMAND:
 			if (!submodule ||
-			    !unsorted_string_list_lookup(
+			    !string_list_lookup(
 					&changed_submodule_names,
 					submodule->name))
 				continue;
@@ -1364,6 +1364,7 @@ int fetch_populated_submodules(struct repository *r,
 	/* default value, "--submodule-prefix" and its value are added later */
 
 	calculate_changed_submodule_paths();
+	string_list_sort(&changed_submodule_names);
 	run_processes_parallel(max_parallel_jobs,
 			       get_next_submodule,
 			       fetch_start_failure,
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 04/10] submodule.c: tighten scope of changed_submodule_names struct
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (2 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 03/10] submodule.c: sort changed_submodule_names before searching it Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-25 23:32 ` [PATCH 05/10] submodule: store OIDs in changed_submodule_names Stefan Beller
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

The `changed_submodule_names` are only used for fetching, so let's make it
part of the struct that is passed around for fetching submodules.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/submodule.c b/submodule.c
index 9fbfcfcfe1..6fb0b9d783 100644
--- a/submodule.c
+++ b/submodule.c
@@ -24,7 +24,6 @@
 #include "object-store.h"
 
 static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
-static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP;
 static int initialized_fetch_ref_tips;
 static struct oid_array ref_tips_before_fetch;
 static struct oid_array ref_tips_after_fetch;
@@ -1124,7 +1123,8 @@ void check_for_new_submodule_commits(struct object_id *oid)
 	oid_array_append(&ref_tips_after_fetch, oid);
 }
 
-static void calculate_changed_submodule_paths(void)
+static void calculate_changed_submodule_paths(
+	struct string_list *changed_submodule_names)
 {
 	struct argv_array argv = ARGV_ARRAY_INIT;
 	struct string_list changed_submodules = STRING_LIST_INIT_DUP;
@@ -1162,7 +1162,8 @@ static void calculate_changed_submodule_paths(void)
 			continue;
 
 		if (!submodule_has_commits(path, commits))
-			string_list_append(&changed_submodule_names, name->string);
+			string_list_append(changed_submodule_names,
+					   name->string);
 	}
 
 	free_submodules_oids(&changed_submodules);
@@ -1208,8 +1209,10 @@ struct submodule_parallel_fetch {
 	int default_option;
 	int quiet;
 	int result;
+
+	struct string_list changed_submodule_names;
 };
-#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0}
+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
 
 static int get_fetch_recurse_config(const struct submodule *submodule,
 				    struct submodule_parallel_fetch *spf)
@@ -1271,7 +1274,7 @@ static int get_next_submodule(struct child_process *cp,
 		case RECURSE_SUBMODULES_ON_DEMAND:
 			if (!submodule ||
 			    !string_list_lookup(
-					&changed_submodule_names,
+					&spf->changed_submodule_names,
 					submodule->name))
 				continue;
 			default_argv = "on-demand";
@@ -1363,8 +1366,8 @@ int fetch_populated_submodules(struct repository *r,
 	argv_array_push(&spf.args, "--recurse-submodules-default");
 	/* default value, "--submodule-prefix" and its value are added later */
 
-	calculate_changed_submodule_paths();
-	string_list_sort(&changed_submodule_names);
+	calculate_changed_submodule_paths(&spf.changed_submodule_names);
+	string_list_sort(&spf.changed_submodule_names);
 	run_processes_parallel(max_parallel_jobs,
 			       get_next_submodule,
 			       fetch_start_failure,
@@ -1373,7 +1376,7 @@ int fetch_populated_submodules(struct repository *r,
 
 	argv_array_clear(&spf.args);
 out:
-	string_list_clear(&changed_submodule_names, 1);
+	string_list_clear(&spf.changed_submodule_names, 1);
 	return spf.result;
 }
 
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 05/10] submodule: store OIDs in changed_submodule_names
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (3 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 04/10] submodule.c: tighten scope of changed_submodule_names struct Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-26 18:57   ` Jonathan Tan
  2018-10-25 23:32 ` [PATCH 06/10] repository: repo_submodule_init to take a submodule struct Stefan Beller
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

'calculate_changed_submodule_paths' uses a local list to compute the
changed submodules, and then produces the result by copying appropriate
items into the result list.

Instead use the result list directly and prune items afterwards
using string_list_remove_empty_items.

By doing so we'll have access to the util pointer for longer that
contains the commits that we need to fetch, which will be
useful in a later patch.

Signed-off-by: Stefan Beller <sbeller@google.com>
Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
---
 submodule.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/submodule.c b/submodule.c
index 6fb0b9d783..e4b494af7b 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1127,8 +1127,7 @@ static void calculate_changed_submodule_paths(
 	struct string_list *changed_submodule_names)
 {
 	struct argv_array argv = ARGV_ARRAY_INIT;
-	struct string_list changed_submodules = STRING_LIST_INIT_DUP;
-	const struct string_list_item *name;
+	struct string_list_item *name;
 
 	/* No need to check if there are no submodules configured */
 	if (!submodule_from_path(the_repository, NULL, NULL))
@@ -1145,9 +1144,9 @@ static void calculate_changed_submodule_paths(
 	 * Collect all submodules (whether checked out or not) for which new
 	 * commits have been recorded upstream in "changed_submodule_names".
 	 */
-	collect_changed_submodules(&changed_submodules, &argv);
+	collect_changed_submodules(changed_submodule_names, &argv);
 
-	for_each_string_list_item(name, &changed_submodules) {
+	for_each_string_list_item(name, changed_submodule_names) {
 		struct oid_array *commits = name->util;
 		const struct submodule *submodule;
 		const char *path = NULL;
@@ -1161,12 +1160,14 @@ static void calculate_changed_submodule_paths(
 		if (!path)
 			continue;
 
-		if (!submodule_has_commits(path, commits))
-			string_list_append(changed_submodule_names,
-					   name->string);
+		if (submodule_has_commits(path, commits)) {
+			oid_array_clear(commits);
+			*name->string = '\0';
+		}
 	}
 
-	free_submodules_oids(&changed_submodules);
+	string_list_remove_empty_items(changed_submodule_names, 1);
+
 	argv_array_clear(&argv);
 	oid_array_clear(&ref_tips_before_fetch);
 	oid_array_clear(&ref_tips_after_fetch);
@@ -1376,7 +1377,7 @@ int fetch_populated_submodules(struct repository *r,
 
 	argv_array_clear(&spf.args);
 out:
-	string_list_clear(&spf.changed_submodule_names, 1);
+	free_submodules_oids(&spf.changed_submodule_names);
 	return spf.result;
 }
 
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 06/10] repository: repo_submodule_init to take a submodule struct
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (4 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 05/10] submodule: store OIDs in changed_submodule_names Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-26 19:15   ` Jonathan Tan
  2018-10-25 23:32 ` [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs Stefan Beller
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

When constructing a struct repository for a submodule for some revision
of the superproject where the submodule is not contained in the index,
it may not be present in the working tree currently either. In that
situation giving a 'path' argument is not useful. Upgrade the
repo_submodule_init function to take a struct submodule instead.
The submodule struct can be obtained via submodule_from_{path, name} or
an artificial submodule struct can be passed in.

While we are at it, rename the repository struct in the repo_submodule_init
function, which is to be initialized, to a name that is not confused with
the struct submodule as easily. Perform such renames in similar functions
as well.

Also move its documentation into the header file.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 builtin/grep.c                               | 17 +++++++-----
 builtin/ls-files.c                           | 12 +++++----
 builtin/submodule--helper.c                  |  2 +-
 repository.c                                 | 27 ++++++++------------
 repository.h                                 | 12 +++++++--
 t/helper/test-submodule-nested-repo-config.c |  8 +++---
 6 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 7da8fef31a..ba7634258a 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -418,7 +418,10 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject,
 			  const struct object_id *oid,
 			  const char *filename, const char *path)
 {
-	struct repository submodule;
+	struct repository subrepo;
+	const struct submodule *sub = submodule_from_path(superproject,
+							  &null_oid, path);
+
 	int hit;
 
 	/*
@@ -434,12 +437,12 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject,
 		return 0;
 	}
 
-	if (repo_submodule_init(&submodule, superproject, path)) {
+	if (repo_submodule_init(&subrepo, superproject, sub)) {
 		grep_read_unlock();
 		return 0;
 	}
 
-	repo_read_gitmodules(&submodule);
+	repo_read_gitmodules(&subrepo);
 
 	/*
 	 * NEEDSWORK: This adds the submodule's object directory to the list of
@@ -451,7 +454,7 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject,
 	 * store is no longer global and instead is a member of the repository
 	 * object.
 	 */
-	add_to_alternates_memory(submodule.objects->objectdir);
+	add_to_alternates_memory(subrepo.objects->objectdir);
 	grep_read_unlock();
 
 	if (oid) {
@@ -476,14 +479,14 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject,
 
 		init_tree_desc(&tree, data, size);
 		hit = grep_tree(opt, pathspec, &tree, &base, base.len,
-				object->type == OBJ_COMMIT, &submodule);
+				object->type == OBJ_COMMIT, &subrepo);
 		strbuf_release(&base);
 		free(data);
 	} else {
-		hit = grep_cache(opt, &submodule, pathspec, 1);
+		hit = grep_cache(opt, &subrepo, pathspec, 1);
 	}
 
-	repo_clear(&submodule);
+	repo_clear(&subrepo);
 	return hit;
 }
 
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 7f9919a362..4d1649c1b3 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -206,17 +206,19 @@ static void show_files(struct repository *repo, struct dir_struct *dir);
 static void show_submodule(struct repository *superproject,
 			   struct dir_struct *dir, const char *path)
 {
-	struct repository submodule;
+	struct repository subrepo;
+	const struct submodule *sub = submodule_from_path(superproject,
+							  &null_oid, path);
 
-	if (repo_submodule_init(&submodule, superproject, path))
+	if (repo_submodule_init(&subrepo, superproject, sub))
 		return;
 
-	if (repo_read_index(&submodule) < 0)
+	if (repo_read_index(&subrepo) < 0)
 		die("index file corrupt");
 
-	show_files(&submodule, dir);
+	show_files(&subrepo, dir);
 
-	repo_clear(&submodule);
+	repo_clear(&subrepo);
 }
 
 static void show_ce(struct repository *repo, struct dir_struct *dir,
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 5f8a804a6e..015aa1471f 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -2037,7 +2037,7 @@ static int ensure_core_worktree(int argc, const char **argv, const char *prefix)
 	if (!sub)
 		BUG("We could get the submodule handle before?");
 
-	if (repo_submodule_init(&subrepo, the_repository, path))
+	if (repo_submodule_init(&subrepo, the_repository, sub))
 		die(_("could not get a repository handle for submodule '%s'"), path);
 
 	if (!repo_config_get_string(&subrepo, "core.worktree", &cw)) {
diff --git a/repository.c b/repository.c
index 5dd1486718..aabe64ee5d 100644
--- a/repository.c
+++ b/repository.c
@@ -166,30 +166,23 @@ int repo_init(struct repository *repo,
 	return -1;
 }
 
-/*
- * Initialize 'submodule' as the submodule given by 'path' in parent repository
- * 'superproject'.
- * Return 0 upon success and a non-zero value upon failure.
- */
-int repo_submodule_init(struct repository *submodule,
+int repo_submodule_init(struct repository *subrepo,
 			struct repository *superproject,
-			const char *path)
+			const struct submodule *sub)
 {
-	const struct submodule *sub;
 	struct strbuf gitdir = STRBUF_INIT;
 	struct strbuf worktree = STRBUF_INIT;
 	int ret = 0;
 
-	sub = submodule_from_path(superproject, &null_oid, path);
 	if (!sub) {
 		ret = -1;
 		goto out;
 	}
 
-	strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", path);
-	strbuf_repo_worktree_path(&worktree, superproject, "%s", path);
+	strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", sub->path);
+	strbuf_repo_worktree_path(&worktree, superproject, "%s", sub->path);
 
-	if (repo_init(submodule, gitdir.buf, worktree.buf)) {
+	if (repo_init(subrepo, gitdir.buf, worktree.buf)) {
 		/*
 		 * If initilization fails then it may be due to the submodule
 		 * not being populated in the superproject's worktree.  Instead
@@ -201,16 +194,16 @@ int repo_submodule_init(struct repository *submodule,
 		strbuf_repo_git_path(&gitdir, superproject,
 				     "modules/%s", sub->name);
 
-		if (repo_init(submodule, gitdir.buf, NULL)) {
+		if (repo_init(subrepo, gitdir.buf, NULL)) {
 			ret = -1;
 			goto out;
 		}
 	}
 
-	submodule->submodule_prefix = xstrfmt("%s%s/",
-					      superproject->submodule_prefix ?
-					      superproject->submodule_prefix :
-					      "", path);
+	subrepo->submodule_prefix = xstrfmt("%s%s/",
+					    superproject->submodule_prefix ?
+					    superproject->submodule_prefix :
+					    "", sub->path);
 
 out:
 	strbuf_release(&gitdir);
diff --git a/repository.h b/repository.h
index 9f16c42c1e..0e482b7d49 100644
--- a/repository.h
+++ b/repository.h
@@ -116,9 +116,17 @@ void repo_set_worktree(struct repository *repo, const char *path);
 void repo_set_hash_algo(struct repository *repo, int algo);
 void initialize_the_repository(void);
 int repo_init(struct repository *r, const char *gitdir, const char *worktree);
-int repo_submodule_init(struct repository *submodule,
+
+/*
+ * Initialize the repository 'subrepo' as the submodule given by the
+ * struct submodule 'sub' in parent repository 'superproject'.
+ * Return 0 upon success and a non-zero value upon failure, which may happen
+ * if the submodule is not found, or 'sub' is NULL.
+ */
+struct submodule;
+int repo_submodule_init(struct repository *subrepo,
 			struct repository *superproject,
-			const char *path);
+			const struct submodule *sub);
 void repo_clear(struct repository *repo);
 
 /*
diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c
index a31e2a9bea..bc97929bbc 100644
--- a/t/helper/test-submodule-nested-repo-config.c
+++ b/t/helper/test-submodule-nested-repo-config.c
@@ -10,19 +10,21 @@ static void die_usage(int argc, const char **argv, const char *msg)
 
 int cmd__submodule_nested_repo_config(int argc, const char **argv)
 {
-	struct repository submodule;
+	struct repository subrepo;
+	const struct submodule *sub;
 
 	if (argc < 3)
 		die_usage(argc, argv, "Wrong number of arguments.");
 
 	setup_git_directory();
 
-	if (repo_submodule_init(&submodule, the_repository, argv[1])) {
+	sub = submodule_from_path(the_repository, &null_oid, argv[1]);
+	if (repo_submodule_init(&subrepo, the_repository, sub)) {
 		die_usage(argc, argv, "Submodule not found.");
 	}
 
 	/* Read the config of _child_ submodules. */
-	print_config_from_gitmodules(&submodule, argv[2]);
+	print_config_from_gitmodules(&subrepo, argv[2]);
 
 	submodule_free(the_repository);
 
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (5 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 06/10] repository: repo_submodule_init to take a submodule struct Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-26 19:26   ` Jonathan Tan
  2018-10-25 23:32 ` [PATCH 08/10] submodule.c: fetch in submodules git directory instead of in worktree Stefan Beller
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

We used to recurse into submodules, even if they were broken having
only an objects directory. The child process executed in the submodule
would fail though if the submodule was broken.

This patch tightens the check upfront, such that we do not need
to spawn a child process to find out if the submodule is broken.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule.c | 51 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/submodule.c b/submodule.c
index e4b494af7b..a1a32cab7d 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1240,6 +1240,29 @@ static int get_fetch_recurse_config(const struct submodule *submodule,
 	return spf->default_option;
 }
 
+static struct repository *get_submodule_repo_for(struct repository *r,
+						 const struct submodule *sub)
+{
+	struct repository *ret = xmalloc(sizeof(*ret));
+
+	if (repo_submodule_init(ret, r, sub)) {
+		/*
+		 * No entry in .gitmodules? Technically not a submodule,
+		 * but historically we supported repositories that happen to be
+		 * in-place where a gitlink is. Keep supporting them.
+		 */
+		struct strbuf gitdir = STRBUF_INIT;
+		strbuf_repo_worktree_path(&gitdir, r, "%s/.git", sub->path);
+		if (repo_init(ret, gitdir.buf, NULL)) {
+			strbuf_release(&gitdir);
+			return NULL;
+		}
+		strbuf_release(&gitdir);
+	}
+
+	return ret;
+}
+
 static int get_next_submodule(struct child_process *cp,
 			      struct strbuf *err, void *data, void **task_cb)
 {
@@ -1247,12 +1270,11 @@ static int get_next_submodule(struct child_process *cp,
 	struct submodule_parallel_fetch *spf = data;
 
 	for (; spf->count < spf->r->index->cache_nr; spf->count++) {
-		struct strbuf submodule_path = STRBUF_INIT;
-		struct strbuf submodule_git_dir = STRBUF_INIT;
 		struct strbuf submodule_prefix = STRBUF_INIT;
 		const struct cache_entry *ce = spf->r->index->cache[spf->count];
-		const char *git_dir, *default_argv;
+		const char *default_argv;
 		const struct submodule *submodule;
+		struct repository *repo;
 		struct submodule default_submodule = SUBMODULE_INIT;
 
 		if (!S_ISGITLINK(ce->ce_mode))
@@ -1287,16 +1309,12 @@ static int get_next_submodule(struct child_process *cp,
 			continue;
 		}
 
-		strbuf_repo_worktree_path(&submodule_path, spf->r, "%s", ce->name);
-		strbuf_addf(&submodule_git_dir, "%s/.git", submodule_path.buf);
 		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name);
-		git_dir = read_gitfile(submodule_git_dir.buf);
-		if (!git_dir)
-			git_dir = submodule_git_dir.buf;
-		if (is_directory(git_dir)) {
+		repo = get_submodule_repo_for(spf->r, submodule);
+		if (repo) {
 			child_process_init(cp);
-			cp->dir = strbuf_detach(&submodule_path, NULL);
 			prepare_submodule_repo_env(&cp->env_array);
+			cp->dir = xstrdup(repo->worktree);
 			cp->git_cmd = 1;
 			if (!spf->quiet)
 				strbuf_addf(err, "Fetching submodule %s%s\n",
@@ -1306,10 +1324,19 @@ static int get_next_submodule(struct child_process *cp,
 			argv_array_push(&cp->args, default_argv);
 			argv_array_push(&cp->args, "--submodule-prefix");
 			argv_array_push(&cp->args, submodule_prefix.buf);
+
+			repo_clear(repo);
+			free(repo);
 			ret = 1;
+		} else {
+			/*
+			 * An empty directory is normal,
+			 * the submodule is not initialized
+			 */
+			if (S_ISGITLINK(ce->ce_mode) &&
+			    !is_empty_dir(ce->name))
+				die(_("Could not access submodule '%s'"), ce->name);
 		}
-		strbuf_release(&submodule_path);
-		strbuf_release(&submodule_git_dir);
 		strbuf_release(&submodule_prefix);
 		if (ret) {
 			spf->count++;
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 08/10] submodule.c: fetch in submodules git directory instead of in worktree
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (6 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-25 23:32 ` [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched Stefan Beller
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

Keep the properties introduced in  10f5c52656 (submodule: avoid
auto-discovery in prepare_submodule_repo_env(), 2016-09-01), by fixating
the git directory of the submodule.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 submodule.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/submodule.c b/submodule.c
index a1a32cab7d..67813fbe78 100644
--- a/submodule.c
+++ b/submodule.c
@@ -494,6 +494,12 @@ void prepare_submodule_repo_env(struct argv_array *out)
 			 DEFAULT_GIT_DIR_ENVIRONMENT);
 }
 
+static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
+{
+	prepare_submodule_repo_env_no_git_dir(out);
+	argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
+}
+
 /* Helper function to display the submodule header line prior to the full
  * summary output. If it can locate the submodule objects directory it will
  * attempt to lookup both the left and right commits and put them into the
@@ -1313,8 +1319,8 @@ static int get_next_submodule(struct child_process *cp,
 		repo = get_submodule_repo_for(spf->r, submodule);
 		if (repo) {
 			child_process_init(cp);
-			prepare_submodule_repo_env(&cp->env_array);
-			cp->dir = xstrdup(repo->worktree);
+			prepare_submodule_repo_env_in_gitdir(&cp->env_array);
+			cp->dir = xstrdup(repo->gitdir);
 			cp->git_cmd = 1;
 			if (!spf->quiet)
 				strbuf_addf(err, "Fetching submodule %s%s\n",
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (7 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 08/10] submodule.c: fetch in submodules git directory instead of in worktree Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-26 20:41   ` Jonathan Tan
  2018-10-25 23:32 ` [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update Stefan Beller
  2018-10-29  3:44 ` [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Junio C Hamano
  10 siblings, 1 reply; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

Currently when git-fetch is asked to recurse into submodules, it dispatches
a plain "git-fetch -C <submodule-dir>" (with some submodule related options
such as prefix and recusing strategy, but) without any information of the
remote or the tip that should be fetched.

But this default fetch is not sufficient, as a newly fetched commit in
the superproject could point to a commit in the submodule that is not
in the default refspec. This is common in workflows like Gerrit's.
When fetching a Gerrit change under review (from refs/changes/??), the
commits in that change likely point to submodule commits that have not
been merged to a branch yet.

Try fetching a submodule by object id if the object id that the
superproject points to, cannot be found.

The try does not happen when the "git fetch" done at the
superproject is not storing the fetched results in remote
tracking branches (i.e. instead just recording them to
FETCH_HEAD) in this step. A later patch will fix this.

builtin/fetch used to only inspect submodules when they were fetched
"on-demand", as in either on/off case it was clear whether the submodule
needs to be fetched. However to know whether we need to try fetching the
object ids, we need to identify the object names, which is done in this
function check_for_new_submodule_commits(), so we'll also run that code
in case the submodule recursion is set to "on".

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 builtin/fetch.c             |   9 +-
 submodule.c                 | 192 ++++++++++++++++++++++++++++++------
 t/t5526-fetch-submodules.sh |  31 ++++++
 3 files changed, 198 insertions(+), 34 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 61bec5d213..95c44bf6ff 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -700,8 +700,7 @@ static int update_local_ref(struct ref *ref,
 			what = _("[new ref]");
 		}
 
-		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
-		    (recurse_submodules != RECURSE_SUBMODULES_ON))
+		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
 			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref(msg, ref, 0);
 		format_display(display, r ? '!' : '*', what,
@@ -716,8 +715,7 @@ static int update_local_ref(struct ref *ref,
 		strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
 		strbuf_addstr(&quickref, "..");
 		strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
-		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
-		    (recurse_submodules != RECURSE_SUBMODULES_ON))
+		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
 			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref("fast-forward", ref, 1);
 		format_display(display, r ? '!' : ' ', quickref.buf,
@@ -731,8 +729,7 @@ static int update_local_ref(struct ref *ref,
 		strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
 		strbuf_addstr(&quickref, "...");
 		strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
-		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
-		    (recurse_submodules != RECURSE_SUBMODULES_ON))
+		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
 			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref("forced-update", ref, 1);
 		format_display(display, r ? '!' : '+', quickref.buf,
diff --git a/submodule.c b/submodule.c
index 67813fbe78..c978a38c81 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1218,8 +1218,12 @@ struct submodule_parallel_fetch {
 	int result;
 
 	struct string_list changed_submodule_names;
+	struct get_next_submodule_task **fetch_specific_oids;
+	int fetch_specific_oids_nr, fetch_specific_oids_alloc;
 };
-#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, \
+		  STRING_LIST_INIT_DUP, \
+		  NULL, 0, 0}
 
 static int get_fetch_recurse_config(const struct submodule *submodule,
 				    struct submodule_parallel_fetch *spf)
@@ -1246,6 +1250,58 @@ static int get_fetch_recurse_config(const struct submodule *submodule,
 	return spf->default_option;
 }
 
+struct get_next_submodule_task {
+	struct repository *repo;
+	const struct submodule *sub;
+	unsigned free_sub : 1; /* Do we need to free the submodule? */
+
+	/* fetch specific oids if set, otherwise fetch default refspec */
+	struct oid_array *commits;
+};
+
+static const struct submodule *get_default_submodule(const char *path)
+{
+	struct submodule *ret = NULL;
+	const char *name = default_name_or_path(path);
+
+	if (!name)
+		return NULL;
+
+	ret = xmalloc(sizeof(*ret));
+	memset(ret, 0, sizeof(*ret));
+	ret->path = name;
+	ret->name = name;
+
+	return (const struct submodule *) ret;
+}
+
+static struct get_next_submodule_task *get_next_submodule_task_create(
+	struct repository *r, const char *path)
+{
+	struct get_next_submodule_task *task = xmalloc(sizeof(*task));
+	memset(task, 0, sizeof(*task));
+
+	task->sub = submodule_from_path(r, &null_oid, path);
+	if (!task->sub) {
+		task->sub = get_default_submodule(path);
+		task->free_sub = 1;
+	}
+
+	return task;
+}
+
+static void get_next_submodule_task_release(struct get_next_submodule_task *p)
+{
+	if (p->free_sub)
+		free((void*)p->sub);
+	p->free_sub = 0;
+	p->sub = NULL;
+
+	if (p->repo)
+		repo_clear(p->repo);
+	FREE_AND_NULL(p->repo);
+}
+
 static struct repository *get_submodule_repo_for(struct repository *r,
 						 const struct submodule *sub)
 {
@@ -1272,39 +1328,35 @@ static struct repository *get_submodule_repo_for(struct repository *r,
 static int get_next_submodule(struct child_process *cp,
 			      struct strbuf *err, void *data, void **task_cb)
 {
-	int ret = 0;
 	struct submodule_parallel_fetch *spf = data;
 
 	for (; spf->count < spf->r->index->cache_nr; spf->count++) {
-		struct strbuf submodule_prefix = STRBUF_INIT;
+		int recurse_config;
 		const struct cache_entry *ce = spf->r->index->cache[spf->count];
 		const char *default_argv;
-		const struct submodule *submodule;
-		struct repository *repo;
-		struct submodule default_submodule = SUBMODULE_INIT;
+		struct get_next_submodule_task *task;
 
 		if (!S_ISGITLINK(ce->ce_mode))
 			continue;
 
-		submodule = submodule_from_path(spf->r, &null_oid, ce->name);
-		if (!submodule) {
-			const char *name = default_name_or_path(ce->name);
-			if (name) {
-				default_submodule.path = name;
-				default_submodule.name = name;
-				submodule = &default_submodule;
-			}
+		task = get_next_submodule_task_create(spf->r, ce->name);
+
+		if (!task->sub) {
+			free(task);
+			continue;
 		}
 
-		switch (get_fetch_recurse_config(submodule, spf))
+		recurse_config = get_fetch_recurse_config(task->sub, spf);
+
+		switch (recurse_config)
 		{
 		default:
 		case RECURSE_SUBMODULES_DEFAULT:
 		case RECURSE_SUBMODULES_ON_DEMAND:
-			if (!submodule ||
+			if (!task->sub ||
 			    !string_list_lookup(
 					&spf->changed_submodule_names,
-					submodule->name))
+					task->sub->name))
 				continue;
 			default_argv = "on-demand";
 			break;
@@ -1315,12 +1367,12 @@ static int get_next_submodule(struct child_process *cp,
 			continue;
 		}
 
-		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name);
-		repo = get_submodule_repo_for(spf->r, submodule);
-		if (repo) {
+		task->repo = get_submodule_repo_for(spf->r, task->sub);
+		if (task->repo) {
+			struct strbuf submodule_prefix = STRBUF_INIT;
 			child_process_init(cp);
 			prepare_submodule_repo_env_in_gitdir(&cp->env_array);
-			cp->dir = xstrdup(repo->gitdir);
+			cp->dir = task->repo->gitdir;
 			cp->git_cmd = 1;
 			if (!spf->quiet)
 				strbuf_addf(err, "Fetching submodule %s%s\n",
@@ -1329,12 +1381,22 @@ static int get_next_submodule(struct child_process *cp,
 			argv_array_pushv(&cp->args, spf->args.argv);
 			argv_array_push(&cp->args, default_argv);
 			argv_array_push(&cp->args, "--submodule-prefix");
+
+			strbuf_addf(&submodule_prefix, "%s%s/",
+						       spf->prefix,
+						       task->sub->path);
 			argv_array_push(&cp->args, submodule_prefix.buf);
 
-			repo_clear(repo);
-			free(repo);
-			ret = 1;
+			spf->count++;
+			*task_cb = task;
+
+			strbuf_release(&submodule_prefix);
+			return 1;
 		} else {
+
+			get_next_submodule_task_release(task);
+			free(task);
+
 			/*
 			 * An empty directory is normal,
 			 * the submodule is not initialized
@@ -1343,12 +1405,36 @@ static int get_next_submodule(struct child_process *cp,
 			    !is_empty_dir(ce->name))
 				die(_("Could not access submodule '%s'"), ce->name);
 		}
+	}
+
+	if (spf->fetch_specific_oids_nr) {
+		struct get_next_submodule_task *task = spf->fetch_specific_oids[spf->fetch_specific_oids_nr - 1];
+		struct strbuf submodule_prefix = STRBUF_INIT;
+		spf->fetch_specific_oids_nr--;
+
+		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, task->sub->path);
+
+		child_process_init(cp);
+		prepare_submodule_repo_env_in_gitdir(&cp->env_array);
+		cp->git_cmd = 1;
+		cp->dir = task->repo->gitdir;
+
+		argv_array_init(&cp->args);
+		argv_array_pushv(&cp->args, spf->args.argv);
+		argv_array_push(&cp->args, "on-demand");
+		argv_array_push(&cp->args, "--submodule-prefix");
+		argv_array_push(&cp->args, submodule_prefix.buf);
+
+		/* NEEDSWORK: have get_default_remote from s--h */
+		argv_array_push(&cp->args, "origin");
+		oid_array_for_each_unique(task->commits,
+					  append_oid_to_argv, &cp->args);
+
+		*task_cb = task;
 		strbuf_release(&submodule_prefix);
-		if (ret) {
-			spf->count++;
-			return 1;
-		}
+		return 1;
 	}
+
 	return 0;
 }
 
@@ -1356,20 +1442,70 @@ static int fetch_start_failure(struct strbuf *err,
 			       void *cb, void *task_cb)
 {
 	struct submodule_parallel_fetch *spf = cb;
+	struct get_next_submodule_task *task = task_cb;
 
 	spf->result = 1;
 
+	get_next_submodule_task_release(task);
 	return 0;
 }
 
+static int commit_exists_in_sub(const struct object_id *oid, void *data)
+{
+	struct repository *subrepo = data;
+
+	enum object_type type = oid_object_info(subrepo, oid, NULL);
+
+	return type != OBJ_COMMIT;
+}
+
 static int fetch_finish(int retvalue, struct strbuf *err,
 			void *cb, void *task_cb)
 {
 	struct submodule_parallel_fetch *spf = cb;
+	struct get_next_submodule_task *task = task_cb;
+	const struct submodule *sub;
+
+	struct string_list_item *it;
+	struct oid_array *commits;
 
 	if (retvalue)
 		spf->result = 1;
 
+	if (!task)
+		return 0;
+
+	sub = task->sub;
+	if (!sub)
+		goto out;
+
+	it = string_list_lookup(&spf->changed_submodule_names, sub->name);
+	if (!it)
+		goto out;
+
+	commits = it->util;
+	oid_array_filter(commits,
+			 commit_exists_in_sub,
+			 task->repo);
+
+	/* Are there commits that do not exist? */
+	if (commits->nr) {
+		/* We already tried fetching them, do not try again. */
+		if (task->commits)
+			return 0;
+
+		task->commits = commits;
+		ALLOC_GROW(spf->fetch_specific_oids,
+			   spf->fetch_specific_oids_nr + 1,
+			   spf->fetch_specific_oids_alloc);
+		spf->fetch_specific_oids[spf->fetch_specific_oids_nr] = task;
+		spf->fetch_specific_oids_nr++;
+		return 0;
+	}
+
+out:
+	get_next_submodule_task_release(task);
+
 	return 0;
 }
 
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index 6c2f9b2ba2..5a75b57852 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -600,4 +600,35 @@ test_expect_success "fetch new commits when submodule got renamed" '
 	test_cmp expect actual
 '
 
+test_expect_success "fetch new submodule commits on-demand outside standard refspec" '
+	# add a second submodule and ensure it is around in downstream first
+	git clone submodule sub1 &&
+	git submodule add ./sub1 &&
+	git commit -m "adding a second submodule" &&
+	git -C downstream pull &&
+	git -C downstream submodule update --init --recursive &&
+
+	git checkout --detach &&
+
+	C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
+	git -C submodule update-ref refs/changes/1 $C &&
+	git update-index --cacheinfo 160000 $C submodule &&
+	test_tick &&
+
+	D=$(git -C sub1 commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
+	git -C sub1 update-ref refs/changes/2 $D &&
+	git update-index --cacheinfo 160000 $D sub1 &&
+
+	git commit -m "updated submodules outside of refs/heads" &&
+	E=$(git rev-parse HEAD) &&
+	git update-ref refs/changes/2 $E &&
+	(
+		cd downstream &&
+		git fetch --recurse-submodules origin refs/changes/2:refs/heads/my_branch &&
+		git -C submodule cat-file -t $C &&
+		git -C sub1 cat-file -t $D &&
+		git checkout --recurse-submodules FETCH_HEAD
+	)
+'
+
 test_done
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (8 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched Stefan Beller
@ 2018-10-25 23:32 ` Stefan Beller
  2018-10-26 20:42   ` Jonathan Tan
  2018-10-29  3:44 ` [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Junio C Hamano
  10 siblings, 1 reply; 19+ messages in thread
From: Stefan Beller @ 2018-10-25 23:32 UTC (permalink / raw)
  To: jonathantanmy; +Cc: git, Stefan Beller

Gerrit, the code review tool, has a different workflow than our mailing
list based approach. Usually users upload changes to a Gerrit server and
continuous integration and testing happens by bots. Sometimes however a
user wants to checkout a change locally and look at it locally. For this
use case, Gerrit offers a command line snippet to copy and paste to your
terminal, which looks like

  git fetch https://<host>/gerrit refs/changes/<id> &&
  git checkout FETCH_HEAD

For Gerrit changes that contain changing submodule gitlinks, it would be
easy to extend both the fetch and checkout with the '--recurse-submodules'
flag, such that this command line snippet would produce the state of a
change locally.

However the functionality added in the previous patch, which would
ensure that we fetch the objects in the submodule that the gitlink pointed
at, only works for remote tracking branches so far, not for FETCH_HEAD.

Make sure that fetching a superproject to its FETCH_HEAD, also respects
the existence checks for objects in the submodule recursion.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 builtin/fetch.c             |  8 ++------
 t/t5526-fetch-submodules.sh | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 95c44bf6ff..f39012d7c2 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -700,8 +700,6 @@ static int update_local_ref(struct ref *ref,
 			what = _("[new ref]");
 		}
 
-		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
-			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref(msg, ref, 0);
 		format_display(display, r ? '!' : '*', what,
 			       r ? _("unable to update local ref") : NULL,
@@ -715,8 +713,6 @@ static int update_local_ref(struct ref *ref,
 		strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
 		strbuf_addstr(&quickref, "..");
 		strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
-		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
-			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref("fast-forward", ref, 1);
 		format_display(display, r ? '!' : ' ', quickref.buf,
 			       r ? _("unable to update local ref") : NULL,
@@ -729,8 +725,6 @@ static int update_local_ref(struct ref *ref,
 		strbuf_add_unique_abbrev(&quickref, &current->object.oid, DEFAULT_ABBREV);
 		strbuf_addstr(&quickref, "...");
 		strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV);
-		if (recurse_submodules != RECURSE_SUBMODULES_OFF)
-			check_for_new_submodule_commits(&ref->new_oid);
 		r = s_update_ref("forced-update", ref, 1);
 		format_display(display, r ? '!' : '+', quickref.buf,
 			       r ? _("unable to update local ref") : _("forced update"),
@@ -826,6 +820,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
 				ref->force = rm->peer_ref->force;
 			}
 
+			if (recurse_submodules != RECURSE_SUBMODULES_OFF)
+				check_for_new_submodule_commits(&rm->old_oid);
 
 			if (!strcmp(rm->name, "HEAD")) {
 				kind = "";
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index 5a75b57852..799785783f 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -631,4 +631,28 @@ test_expect_success "fetch new submodule commits on-demand outside standard refs
 	)
 '
 
+test_expect_success 'fetch new submodule commits on-demand in FETCH_HEAD' '
+	# depends on the previous test for setup
+
+	C=$(git -C submodule commit-tree -m "another change outside refs/heads" HEAD^{tree}) &&
+	git -C submodule update-ref refs/changes/1 $C &&
+	git update-index --cacheinfo 160000 $C submodule &&
+	test_tick &&
+
+	D=$(git -C sub1 commit-tree -m "another change outside refs/heads" HEAD^{tree}) &&
+	git -C sub1 update-ref refs/changes/2 $D &&
+	git update-index --cacheinfo 160000 $D sub1 &&
+
+	git commit -m "updated submodules outside of refs/heads" &&
+	E=$(git rev-parse HEAD) &&
+	git update-ref refs/changes/2 $E &&
+	(
+		cd downstream &&
+		git fetch --recurse-submodules origin refs/changes/2 &&
+		git -C submodule cat-file -t $C &&
+		git -C sub1 cat-file -t $D &&
+		git checkout --recurse-submodules FETCH_HEAD
+	)
+'
+
 test_done
-- 
2.19.0


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 05/10] submodule: store OIDs in changed_submodule_names
  2018-10-25 23:32 ` [PATCH 05/10] submodule: store OIDs in changed_submodule_names Stefan Beller
@ 2018-10-26 18:57   ` Jonathan Tan
  0 siblings, 0 replies; 19+ messages in thread
From: Jonathan Tan @ 2018-10-26 18:57 UTC (permalink / raw)
  To: sbeller; +Cc: jonathantanmy, git

> Reviewed-by: Jonathan Tan <jonathantanmy@google.com>

Probably better not to include such lines, especially since the review
by me is not yet complete.

Having said that, patches 1-5 look good to me. Patches 1-3 are identical
to the previous version, which I have already reviewed. In patch 4,
Stefan made the code change I suggested [1].

In this patch, compared to the previous version which I have already
reviewed [2], the code is unchanged aside from some variable renaming. I
suggested a commit title change which Stefan has done.

[1] https://public-inbox.org/git/20181017212624.196598-1-jonathantanmy@google.com/
[2] https://public-inbox.org/git/20181017214534.199890-1-jonathantanmy@google.com/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 06/10] repository: repo_submodule_init to take a submodule struct
  2018-10-25 23:32 ` [PATCH 06/10] repository: repo_submodule_init to take a submodule struct Stefan Beller
@ 2018-10-26 19:15   ` Jonathan Tan
  2018-10-26 22:01     ` Stefan Beller
  0 siblings, 1 reply; 19+ messages in thread
From: Jonathan Tan @ 2018-10-26 19:15 UTC (permalink / raw)
  To: sbeller; +Cc: jonathantanmy, git

> diff --git a/builtin/grep.c b/builtin/grep.c
> index 7da8fef31a..ba7634258a 100644
> --- a/builtin/grep.c
> +++ b/builtin/grep.c
> @@ -418,7 +418,10 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject,
>  			  const struct object_id *oid,
>  			  const char *filename, const char *path)
>  {
> -	struct repository submodule;
> +	struct repository subrepo;
> +	const struct submodule *sub = submodule_from_path(superproject,
> +							  &null_oid, path);

[snip]

> -	if (repo_submodule_init(&submodule, superproject, path)) {
> +	if (repo_submodule_init(&subrepo, superproject, sub)) {

The last argument to repo_submodule_init is now
"submodule_from_path(superproject, &null_oid, path)" instead of "path",
and looking forward into the patch, we do not need a NULL check because
repo_submodule_init() tolerates NULL in that argument.

Let's see if the rest of the code follows the pattern - a call to
submodule_from_path() with the 3 expected arguments (repo, null OID,
path).

> diff --git a/builtin/ls-files.c b/builtin/ls-files.c
> index 7f9919a362..4d1649c1b3 100644
> --- a/builtin/ls-files.c
> +++ b/builtin/ls-files.c
> @@ -206,17 +206,19 @@ static void show_files(struct repository *repo, struct dir_struct *dir);
>  static void show_submodule(struct repository *superproject,
>  			   struct dir_struct *dir, const char *path)
>  {
> -	struct repository submodule;
> +	struct repository subrepo;
> +	const struct submodule *sub = submodule_from_path(superproject,
> +							  &null_oid, path);
>  
> -	if (repo_submodule_init(&submodule, superproject, path))
> +	if (repo_submodule_init(&subrepo, superproject, sub))

So far so good.

> diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
> index 5f8a804a6e..015aa1471f 100644
> --- a/builtin/submodule--helper.c
> +++ b/builtin/submodule--helper.c
> @@ -2037,7 +2037,7 @@ static int ensure_core_worktree(int argc, const char **argv, const char *prefix)
>  	if (!sub)
>  		BUG("We could get the submodule handle before?");
>  
> -	if (repo_submodule_init(&subrepo, the_repository, path))
> +	if (repo_submodule_init(&subrepo, the_repository, sub))

The definition of "sub" is not quoted here in this e-mail, but it is
indeed "submodule_from_path(the_repository, &null_oid, path)".
("the_repository" in the invocation to submodule_from_path() is correct
because the 2nd argument to the invocation of repo_submodule_init() is
"the_repository".)

> -int repo_submodule_init(struct repository *submodule,
> +int repo_submodule_init(struct repository *subrepo,
>  			struct repository *superproject,
> -			const char *path)
> +			const struct submodule *sub)
>  {
> -	const struct submodule *sub;
>  	struct strbuf gitdir = STRBUF_INIT;
>  	struct strbuf worktree = STRBUF_INIT;
>  	int ret = 0;
>  
> -	sub = submodule_from_path(superproject, &null_oid, path);

As expected, this line is removed.

>  	if (!sub) {
>  		ret = -1;
>  		goto out;
>  	}
>  
> -	strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", path);
> -	strbuf_repo_worktree_path(&worktree, superproject, "%s", path);
> +	strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", sub->path);
> +	strbuf_repo_worktree_path(&worktree, superproject, "%s", sub->path);

path and sub->path are the same, so this is fine. (This can be seen from
cache_put_path() and cache_lookup_path() in submodule-config.c.)

> -	submodule->submodule_prefix = xstrfmt("%s%s/",
> -					      superproject->submodule_prefix ?
> -					      superproject->submodule_prefix :
> -					      "", path);
> +	subrepo->submodule_prefix = xstrfmt("%s%s/",
> +					    superproject->submodule_prefix ?
> +					    superproject->submodule_prefix :
> +					    "", sub->path);

Likewise.

> +/*
> + * Initialize the repository 'subrepo' as the submodule given by the
> + * struct submodule 'sub' in parent repository 'superproject'.
> + * Return 0 upon success and a non-zero value upon failure, which may happen
> + * if the submodule is not found, or 'sub' is NULL.
> + */
> +struct submodule;
> +int repo_submodule_init(struct repository *subrepo,
>  			struct repository *superproject,
> -			const char *path);
> +			const struct submodule *sub);

Here is where it says that the last argument can be NULL.

> diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c
> index a31e2a9bea..bc97929bbc 100644
> --- a/t/helper/test-submodule-nested-repo-config.c
> +++ b/t/helper/test-submodule-nested-repo-config.c
> @@ -10,19 +10,21 @@ static void die_usage(int argc, const char **argv, const char *msg)
>  
>  int cmd__submodule_nested_repo_config(int argc, const char **argv)
>  {
> -	struct repository submodule;
> +	struct repository subrepo;
> +	const struct submodule *sub;
>  
>  	if (argc < 3)
>  		die_usage(argc, argv, "Wrong number of arguments.");
>  
>  	setup_git_directory();
>  
> -	if (repo_submodule_init(&submodule, the_repository, argv[1])) {
> +	sub = submodule_from_path(the_repository, &null_oid, argv[1]);
> +	if (repo_submodule_init(&subrepo, the_repository, sub)) {

The expected pattern.

This patch looks good to me.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs
  2018-10-25 23:32 ` [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs Stefan Beller
@ 2018-10-26 19:26   ` Jonathan Tan
  0 siblings, 0 replies; 19+ messages in thread
From: Jonathan Tan @ 2018-10-26 19:26 UTC (permalink / raw)
  To: sbeller; +Cc: jonathantanmy, git

> We used to recurse into submodules, even if they were broken having
> only an objects directory. The child process executed in the submodule
> would fail though if the submodule was broken.
> 
> This patch tightens the check upfront, such that we do not need
> to spawn a child process to find out if the submodule is broken.

Thanks for the clear commit message. Also mention which tests currently
exercise this.

> +static struct repository *get_submodule_repo_for(struct repository *r,
> +						 const struct submodule *sub)
> +{
> +	struct repository *ret = xmalloc(sizeof(*ret));
> +
> +	if (repo_submodule_init(ret, r, sub)) {
> +		/*
> +		 * No entry in .gitmodules? Technically not a submodule,
> +		 * but historically we supported repositories that happen to be
> +		 * in-place where a gitlink is. Keep supporting them.
> +		 */
> +		struct strbuf gitdir = STRBUF_INIT;
> +		strbuf_repo_worktree_path(&gitdir, r, "%s/.git", sub->path);
> +		if (repo_init(ret, gitdir.buf, NULL)) {
> +			strbuf_release(&gitdir);

You should also free ret here.

> +			return NULL;
> +		}
> +		strbuf_release(&gitdir);
> +	}
> +
> +	return ret;
> +}

The above is the rest of get_submodule_repo_for(), so that we can see
that gitdir is indeed freed.

> -		if (!git_dir)
> -			git_dir = submodule_git_dir.buf;
> -		if (is_directory(git_dir)) {
> +		repo = get_submodule_repo_for(spf->r, submodule);
> +		if (repo) {
>  			child_process_init(cp);
> -			cp->dir = strbuf_detach(&submodule_path, NULL);
>  			prepare_submodule_repo_env(&cp->env_array);
> +			cp->dir = xstrdup(repo->worktree);

Move the cp->dir one line up for a cleaner diff.

[snip]

> +			repo_clear(repo);
> +			free(repo);
>  			ret = 1;
> +		} else {
> +			/*
> +			 * An empty directory is normal,
> +			 * the submodule is not initialized
> +			 */
> +			if (S_ISGITLINK(ce->ce_mode) &&
> +			    !is_empty_dir(ce->name))
> +				die(_("Could not access submodule '%s'"), ce->name);

Previously, a failed fetch would just set spf->result = 1 (in
fetch_finish()), allowing other fetches to still proceed. This sounds
like a better idea to me instead of die-ing outright. (Also remember to
print a warning message - since we no longer spawn a child process to
try a fetch that will fail, we need to print a message ourselves.)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched
  2018-10-25 23:32 ` [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched Stefan Beller
@ 2018-10-26 20:41   ` Jonathan Tan
  2018-11-29  0:30     ` Stefan Beller
  0 siblings, 1 reply; 19+ messages in thread
From: Jonathan Tan @ 2018-10-26 20:41 UTC (permalink / raw)
  To: sbeller; +Cc: jonathantanmy, git

> But this default fetch is not sufficient, as a newly fetched commit in
> the superproject could point to a commit in the submodule that is not
> in the default refspec. This is common in workflows like Gerrit's.
> When fetching a Gerrit change under review (from refs/changes/??), the
> commits in that change likely point to submodule commits that have not
> been merged to a branch yet.
> 
> Try fetching a submodule by object id if the object id that the
> superproject points to, cannot be found.

I see that these suggestions of mine (from [1]) was implemented, but not
others. If you disagree, that's fine, but I think they should be
discussed.

[1] https://public-inbox.org/git/20181018003954.139498-1-jonathantanmy@google.com/

> The try does not happen when the "git fetch" done at the
> superproject is not storing the fetched results in remote
> tracking branches (i.e. instead just recording them to
> FETCH_HEAD) in this step. A later patch will fix this.

E.g. here, I said that there was no remote tracking branch involved.

> -		if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
> -		    (recurse_submodules != RECURSE_SUBMODULES_ON))
> +		if (recurse_submodules != RECURSE_SUBMODULES_OFF)

I think the next patch should be squashed into this patch. Then you can
say that these are now redundant and can be removed.

> @@ -1218,8 +1218,12 @@ struct submodule_parallel_fetch {
>  	int result;
>  
>  	struct string_list changed_submodule_names;
> +	struct get_next_submodule_task **fetch_specific_oids;
> +	int fetch_specific_oids_nr, fetch_specific_oids_alloc;
>  };

Add documentation for fetch_specific_oids. Also, it might be better to
call these oid_fetch_tasks and the struct, "struct fetch_task".

Here, struct get_next_submodule_task is used for 2 different things:
 (1) After the first round fetch, fetch_finish() uses it to determine if
     a second round is needed.
 (2) In submodule_parallel_fetch.fetch_specific_oids, to tell the
     parallel runner (through get_next_submodule_task()) to do this
     fetch.

I think that it's better to have 2 different structs for these 2
different uses. (Note that task_cb can be NULL for the second round.
Unless we plan to recheck the OIDs? Currently we recheck them, but we
don't do anything either way.)

> +static const struct submodule *get_default_submodule(const char *path)
> +{
> +	struct submodule *ret = NULL;
> +	const char *name = default_name_or_path(path);
> +
> +	if (!name)
> +		return NULL;
> +
> +	ret = xmalloc(sizeof(*ret));
> +	memset(ret, 0, sizeof(*ret));
> +	ret->path = name;
> +	ret->name = name;
> +
> +	return (const struct submodule *) ret;
> +}

I think that this is best described as the submodule that has no entry
in .gitmodules? Maybe call it "get_non_gitmodules_submodule" and
document it with a similar comment as in get_submodule_repo_for().

> +
> +static struct get_next_submodule_task *get_next_submodule_task_create(
> +	struct repository *r, const char *path)
> +{
> +	struct get_next_submodule_task *task = xmalloc(sizeof(*task));
> +	memset(task, 0, sizeof(*task));
> +
> +	task->sub = submodule_from_path(r, &null_oid, path);
> +	if (!task->sub) {
> +		task->sub = get_default_submodule(path);
> +		task->free_sub = 1;
> +	}
> +
> +	return task;
> +}

Clearer to me would be to make get_next_submodule_task_create() return
NULL if submodule_from_path() returns NULL.

> +	if (spf->fetch_specific_oids_nr) {
> +		struct get_next_submodule_task *task = spf->fetch_specific_oids[spf->fetch_specific_oids_nr - 1];

Break lines to 80.

> +		argv_array_pushv(&cp->args, spf->args.argv);
> +		argv_array_push(&cp->args, "on-demand");

Same comment about "on-demand" as in my previous e-mail.

> +		argv_array_push(&cp->args, "--submodule-prefix");
> +		argv_array_push(&cp->args, submodule_prefix.buf);
> +
> +		/* NEEDSWORK: have get_default_remote from s--h */

Same comment about "s--h" as in my previous e-mail.

> +	commits = it->util;
> +	oid_array_filter(commits,
> +			 commit_exists_in_sub,
> +			 task->repo);
> +
> +	/* Are there commits that do not exist? */
> +	if (commits->nr) {
> +		/* We already tried fetching them, do not try again. */
> +		if (task->commits)
> +			return 0;

Same comment about "task->commits" as in my previous e-mail.

> diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
> index 6c2f9b2ba2..5a75b57852 100755

One more thing to test is the case where a submodule doesn't have a
.gitmodules entry.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update
  2018-10-25 23:32 ` [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update Stefan Beller
@ 2018-10-26 20:42   ` Jonathan Tan
  0 siblings, 0 replies; 19+ messages in thread
From: Jonathan Tan @ 2018-10-26 20:42 UTC (permalink / raw)
  To: sbeller; +Cc: jonathantanmy, git

> Gerrit, the code review tool, has a different workflow than our mailing
> list based approach. Usually users upload changes to a Gerrit server and
> continuous integration and testing happens by bots. Sometimes however a
> user wants to checkout a change locally and look at it locally. For this
> use case, Gerrit offers a command line snippet to copy and paste to your
> terminal, which looks like

As I said in my review of the previous patch, I think this should be
squashed into the previous patch.

Also, remember to add the version when formatting the e-mail ("[PATCH
v? ??/??]").

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 06/10] repository: repo_submodule_init to take a submodule struct
  2018-10-26 19:15   ` Jonathan Tan
@ 2018-10-26 22:01     ` Stefan Beller
  0 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-10-26 22:01 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

On Fri, Oct 26, 2018 at 12:15 PM Jonathan Tan <jonathantanmy@google.com> wrote:

 [snip]

> The expected pattern.
>
> This patch looks good to me.

I'll take this as a "Reviewed-by"?

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip
  2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
                   ` (9 preceding siblings ...)
  2018-10-25 23:32 ` [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update Stefan Beller
@ 2018-10-29  3:44 ` Junio C Hamano
  10 siblings, 0 replies; 19+ messages in thread
From: Junio C Hamano @ 2018-10-29  3:44 UTC (permalink / raw)
  To: Stefan Beller; +Cc: jonathantanmy, git

Stefan Beller <sbeller@google.com> writes:

> Thanks Jonathan for your thoughtful comments,
> here is the product of the discussion:

Can you use v$n in the subject (if these rerolls are meant for
application and not primarily for discussion, that is)?  It quickly
gets confusing to see many messages with the same subject from
different attempts.

> This is still based on ao/submodule-wo-gitmodules-checked-out.

... which just got updated but I think only one test script changed,
so I expect there won't be anything unexpected when I queue this on
top of a merge of that topic and master.

It seems that the first three preparatory steps haven't changed,
patch #4-#7 have moderate amount of changes, and the previous #8 got
updated quite a lot.  Let's see how well this and updated base topic
will play with other topics in flight.

Thanks.

> previous version
> https://public-inbox.org/git/20181016181327.107186-1-sbeller@google.com/
>
> Stefan Beller (10):
>   sha1-array: provide oid_array_filter
>   submodule.c: fix indentation
>   submodule.c: sort changed_submodule_names before searching it
>   submodule.c: tighten scope of changed_submodule_names struct
>   submodule: store OIDs in changed_submodule_names
>   repository: repo_submodule_init to take a submodule struct
>   submodule: migrate get_next_submodule to use repository structs
>   submodule.c: fetch in submodules git directory instead of in worktree
>   fetch: try fetching submodules if needed objects were not fetched
>   builtin/fetch: check for submodule updates in any ref update
>
>  Documentation/technical/api-oid-array.txt    |   5 +
>  builtin/fetch.c                              |  11 +-
>  builtin/grep.c                               |  17 +-
>  builtin/ls-files.c                           |  12 +-
>  builtin/submodule--helper.c                  |   2 +-
>  repository.c                                 |  27 +-
>  repository.h                                 |  12 +-
>  sha1-array.c                                 |  17 ++
>  sha1-array.h                                 |   3 +
>  submodule.c                                  | 265 ++++++++++++++++---
>  t/helper/test-submodule-nested-repo-config.c |   8 +-
>  t/t5526-fetch-submodules.sh                  |  55 ++++
>  12 files changed, 346 insertions(+), 88 deletions(-)
>
>   git range-diff origin/sb/submodule-recursive-fetch-gets-the-tip... 
>
> 1:  3fbb06aedd ! 1:  0fdb0e2ad9 submodule.c: move global changed_submodule_names into fetch submodule struct
>     @@ -1,12 +1,11 @@
>      Author: Stefan Beller <sbeller@google.com>
>      
>     -    submodule.c: move global changed_submodule_names into fetch submodule struct
>     +    submodule.c: tighten scope of changed_submodule_names struct
>      
>          The `changed_submodule_names` are only used for fetching, so let's make it
>          part of the struct that is passed around for fetching submodules.
>      
>          Signed-off-by: Stefan Beller <sbeller@google.com>
>     -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
>      
>      diff --git a/submodule.c b/submodule.c
>      --- a/submodule.c
>     @@ -16,7 +15,6 @@
>       
>       static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
>      -static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP;
>     -+
>       static int initialized_fetch_ref_tips;
>       static struct oid_array ref_tips_before_fetch;
>       static struct oid_array ref_tips_after_fetch;
>     @@ -25,22 +23,8 @@
>       }
>       
>      -static void calculate_changed_submodule_paths(void)
>     -+struct submodule_parallel_fetch {
>     -+	int count;
>     -+	struct argv_array args;
>     -+	struct repository *r;
>     -+	const char *prefix;
>     -+	int command_line_option;
>     -+	int default_option;
>     -+	int quiet;
>     -+	int result;
>     -+
>     -+	struct string_list changed_submodule_names;
>     -+};
>     -+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
>     -+
>      +static void calculate_changed_submodule_paths(
>     -+	struct submodule_parallel_fetch *spf)
>     ++	struct string_list *changed_submodule_names)
>       {
>       	struct argv_array argv = ARGV_ARRAY_INIT;
>       	struct string_list changed_submodules = STRING_LIST_INIT_DUP;
>     @@ -49,30 +33,23 @@
>       
>       		if (!submodule_has_commits(path, commits))
>      -			string_list_append(&changed_submodule_names, name->string);
>     -+			string_list_append(&spf->changed_submodule_names,
>     ++			string_list_append(changed_submodule_names,
>      +					   name->string);
>       	}
>       
>       	free_submodules_oids(&changed_submodules);
>      @@
>     - 	return ret;
>     - }
>     - 
>     --struct submodule_parallel_fetch {
>     --	int count;
>     --	struct argv_array args;
>     --	struct repository *r;
>     --	const char *prefix;
>     --	int command_line_option;
>     --	int default_option;
>     --	int quiet;
>     --	int result;
>     --};
>     + 	int default_option;
>     + 	int quiet;
>     + 	int result;
>     ++
>     ++	struct string_list changed_submodule_names;
>     + };
>      -#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0}
>     --
>     ++#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
>     + 
>       static int get_fetch_recurse_config(const struct submodule *submodule,
>       				    struct submodule_parallel_fetch *spf)
>     - {
>      @@
>       		case RECURSE_SUBMODULES_ON_DEMAND:
>       			if (!submodule ||
>     @@ -88,7 +65,7 @@
>       
>      -	calculate_changed_submodule_paths();
>      -	string_list_sort(&changed_submodule_names);
>     -+	calculate_changed_submodule_paths(&spf);
>     ++	calculate_changed_submodule_paths(&spf.changed_submodule_names);
>      +	string_list_sort(&spf.changed_submodule_names);
>       	run_processes_parallel(max_parallel_jobs,
>       			       get_next_submodule,
> 2:  a64a8322a1 ! 2:  a11e6e39a2 submodule.c: do not copy around submodule list
>     @@ -1,6 +1,6 @@
>      Author: Stefan Beller <sbeller@google.com>
>      
>     -    submodule.c: do not copy around submodule list
>     +    submodule: store OIDs in changed_submodule_names
>      
>          'calculate_changed_submodule_paths' uses a local list to compute the
>          changed submodules, and then produces the result by copying appropriate
>     @@ -14,13 +14,13 @@
>          useful in a later patch.
>      
>          Signed-off-by: Stefan Beller <sbeller@google.com>
>     -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
>     +    Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
>      
>      diff --git a/submodule.c b/submodule.c
>      --- a/submodule.c
>      +++ b/submodule.c
>      @@
>     - 	struct submodule_parallel_fetch *spf)
>     + 	struct string_list *changed_submodule_names)
>       {
>       	struct argv_array argv = ARGV_ARRAY_INIT;
>      -	struct string_list changed_submodules = STRING_LIST_INIT_DUP;
>     @@ -34,10 +34,10 @@
>       	 * commits have been recorded upstream in "changed_submodule_names".
>       	 */
>      -	collect_changed_submodules(&changed_submodules, &argv);
>     -+	collect_changed_submodules(&spf->changed_submodule_names, &argv);
>     ++	collect_changed_submodules(changed_submodule_names, &argv);
>       
>      -	for_each_string_list_item(name, &changed_submodules) {
>     -+	for_each_string_list_item(name, &spf->changed_submodule_names) {
>     ++	for_each_string_list_item(name, changed_submodule_names) {
>       		struct oid_array *commits = name->util;
>       		const struct submodule *submodule;
>       		const char *path = NULL;
>     @@ -46,7 +46,7 @@
>       			continue;
>       
>      -		if (!submodule_has_commits(path, commits))
>     --			string_list_append(&spf->changed_submodule_names,
>     +-			string_list_append(changed_submodule_names,
>      -					   name->string);
>      +		if (submodule_has_commits(path, commits)) {
>      +			oid_array_clear(commits);
>     @@ -55,7 +55,7 @@
>       	}
>       
>      -	free_submodules_oids(&changed_submodules);
>     -+	string_list_remove_empty_items(&spf->changed_submodule_names, 1);
>     ++	string_list_remove_empty_items(changed_submodule_names, 1);
>      +
>       	argv_array_clear(&argv);
>       	oid_array_clear(&ref_tips_before_fetch);
> 3:  9341b92c83 ! 3:  3f4e0d4b8d repository: repo_submodule_init to take a submodule struct
>     @@ -5,17 +5,19 @@
>          When constructing a struct repository for a submodule for some revision
>          of the superproject where the submodule is not contained in the index,
>          it may not be present in the working tree currently either. In that
>     -    siutation giving a 'path' argument is not useful. Upgrade the
>     +    situation giving a 'path' argument is not useful. Upgrade the
>          repo_submodule_init function to take a struct submodule instead.
>     +    The submodule struct can be obtained via submodule_from_{path, name} or
>     +    an artificial submodule struct can be passed in.
>      
>     -    While we are at it, overhaul the repo_submodule_init function by renaming
>     -    the submodule repository struct, which is to be initialized, to a name
>     -    that is not confused with the struct submodule as easily.
>     +    While we are at it, rename the repository struct in the repo_submodule_init
>     +    function, which is to be initialized, to a name that is not confused with
>     +    the struct submodule as easily. Perform such renames in similar functions
>     +    as well.
>      
>          Also move its documentation into the header file.
>      
>          Signed-off-by: Stefan Beller <sbeller@google.com>
>     -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
>      
>      diff --git a/builtin/grep.c b/builtin/grep.c
>      --- a/builtin/grep.c
>     @@ -104,6 +106,19 @@
>       
>       static void show_ce(struct repository *repo, struct dir_struct *dir,
>      
>     +diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
>     +--- a/builtin/submodule--helper.c
>     ++++ b/builtin/submodule--helper.c
>     +@@
>     + 	if (!sub)
>     + 		BUG("We could get the submodule handle before?");
>     + 
>     +-	if (repo_submodule_init(&subrepo, the_repository, path))
>     ++	if (repo_submodule_init(&subrepo, the_repository, sub))
>     + 		die(_("could not get a repository handle for submodule '%s'"), path);
>     + 
>     + 	if (!repo_config_get_string(&subrepo, "core.worktree", &cw)) {
>     +
>      diff --git a/repository.c b/repository.c
>      --- a/repository.c
>      +++ b/repository.c
>     @@ -178,7 +193,8 @@
>      +/*
>      + * Initialize the repository 'subrepo' as the submodule given by the
>      + * struct submodule 'sub' in parent repository 'superproject'.
>     -+ * Return 0 upon success and a non-zero value upon failure.
>     ++ * Return 0 upon success and a non-zero value upon failure, which may happen
>     ++ * if the submodule is not found, or 'sub' is NULL.
>      + */
>      +struct submodule;
>      +int repo_submodule_init(struct repository *subrepo,
> 4:  cea909cbd4 ! 4:  b1566069e7 submodule: fetch in submodules git directory instead of in worktree
>     @@ -1,44 +1,19 @@
>      Author: Stefan Beller <sbeller@google.com>
>      
>     -    submodule: fetch in submodules git directory instead of in worktree
>     +    submodule: migrate get_next_submodule to use repository structs
>      
>     -    This patch started as a refactoring to make 'get_next_submodule' more
>     -    readable, but upon doing so, I realized that "git fetch" of the submodule
>     -    actually doesn't need to be run in the submodules worktree. So let's run
>     -    it in its git dir instead.
>     +    We used to recurse into submodules, even if they were broken having
>     +    only an objects directory. The child process executed in the submodule
>     +    would fail though if the submodule was broken.
>      
>     -    That should pave the way towards fetching submodules that are currently
>     -    not checked out.
>     -
>     -    This patch leaks the cp->dir in get_next_submodule, as any further
>     -    callback in run_processes_parallel doesn't have access to the child
>     -    process any more. In an early iteration of this patch, the function
>     -    get_submodule_repo_for directly returned the string containing the
>     -    git directory, which would be a better design choice for this patch.
>     -
>     -    However the next patch both fixes the memory leak of cp->dir and also has
>     -    a use case for using the full repository handle of the submodule, so
>     -    it makes sense to introduce the get_submodule_repo_for here already.
>     +    This patch tightens the check upfront, such that we do not need
>     +    to spawn a child process to find out if the submodule is broken.
>      
>          Signed-off-by: Stefan Beller <sbeller@google.com>
>     -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
>      
>      diff --git a/submodule.c b/submodule.c
>      --- a/submodule.c
>      +++ b/submodule.c
>     -@@
>     - 			 DEFAULT_GIT_DIR_ENVIRONMENT);
>     - }
>     - 
>     -+static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
>     -+{
>     -+	prepare_submodule_repo_env_no_git_dir(out);
>     -+	argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
>     -+}
>     -+
>     - /* Helper function to display the submodule header line prior to the full
>     -  * summary output. If it can locate the submodule objects directory it will
>     -  * attempt to lookup both the left and right commits and put them into the
>      @@
>       	return spf->default_option;
>       }
>     @@ -99,9 +74,8 @@
>      +		if (repo) {
>       			child_process_init(cp);
>      -			cp->dir = strbuf_detach(&submodule_path, NULL);
>     --			prepare_submodule_repo_env(&cp->env_array);
>     -+			prepare_submodule_repo_env_in_gitdir(&cp->env_array);
>     -+			cp->dir = xstrdup(repo->gitdir);
>     + 			prepare_submodule_repo_env(&cp->env_array);
>     ++			cp->dir = xstrdup(repo->worktree);
>       			cp->git_cmd = 1;
>       			if (!spf->quiet)
>       				strbuf_addf(err, "Fetching submodule %s%s\n",
>     @@ -113,27 +87,17 @@
>      +			repo_clear(repo);
>      +			free(repo);
>       			ret = 1;
>     ++		} else {
>     ++			/*
>     ++			 * An empty directory is normal,
>     ++			 * the submodule is not initialized
>     ++			 */
>     ++			if (S_ISGITLINK(ce->ce_mode) &&
>     ++			    !is_empty_dir(ce->name))
>     ++				die(_("Could not access submodule '%s'"), ce->name);
>       		}
>      -		strbuf_release(&submodule_path);
>      -		strbuf_release(&submodule_git_dir);
>       		strbuf_release(&submodule_prefix);
>       		if (ret) {
>       			spf->count++;
>     -
>     -diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
>     ---- a/t/t5526-fetch-submodules.sh
>     -+++ b/t/t5526-fetch-submodules.sh
>     -@@
>     - 
>     - 	test_must_fail git -C dst status &&
>     - 	test_must_fail git -C dst diff &&
>     --	test_must_fail git -C dst fetch --recurse-submodules
>     -+
>     -+	# git-fetch cannot find the git directory of the submodule,
>     -+	# so it will do nothing, successfully, as it cannot distinguish between
>     -+	# this broken submodule and a submodule that was just set active but
>     -+	# not cloned yet
>     -+	git -C dst fetch --recurse-submodules
>     - '
>     - 
>     - test_expect_success "fetch new commits when submodule got renamed" '
> -:  ---------- > 5:  2d98ff1201 submodule.c: fetch in submodules git directory instead of in worktree
> 5:  9ad0125310 ! 6:  092b9cbcba fetch: retry fetching submodules if needed objects were not fetched
>     @@ -1,25 +1,23 @@
>      Author: Stefan Beller <sbeller@google.com>
>      
>     -    fetch: retry fetching submodules if needed objects were not fetched
>     +    fetch: try fetching submodules if needed objects were not fetched
>      
>          Currently when git-fetch is asked to recurse into submodules, it dispatches
>          a plain "git-fetch -C <submodule-dir>" (with some submodule related options
>          such as prefix and recusing strategy, but) without any information of the
>          remote or the tip that should be fetched.
>      
>     -    This works surprisingly well in some workflows (such as using submodules
>     -    as a third party library), while not so well in other scenarios, such
>     -    as in a Gerrit topic-based workflow, that can tie together changes
>     -    (potentially across repositories) on the server side. One of the parts
>     -    of such a Gerrit workflow is to download a change when wanting to examine
>     -    it, and you'd want to have its submodule changes that are in the same
>     -    topic downloaded as well. However these submodule changes reside in their
>     -    own repository in their own ref (refs/changes/<int>).
>     +    But this default fetch is not sufficient, as a newly fetched commit in
>     +    the superproject could point to a commit in the submodule that is not
>     +    in the default refspec. This is common in workflows like Gerrit's.
>     +    When fetching a Gerrit change under review (from refs/changes/??), the
>     +    commits in that change likely point to submodule commits that have not
>     +    been merged to a branch yet.
>      
>     -    Retry fetching a submodule by object name if the object id that the
>     +    Try fetching a submodule by object id if the object id that the
>          superproject points to, cannot be found.
>      
>     -    This retrying does not happen when the "git fetch" done at the
>     +    The try does not happen when the "git fetch" done at the
>          superproject is not storing the fetched results in remote
>          tracking branches (i.e. instead just recording them to
>          FETCH_HEAD) in this step. A later patch will fix this.
>     @@ -32,7 +30,6 @@
>          in case the submodule recursion is set to "on".
>      
>          Signed-off-by: Stefan Beller <sbeller@google.com>
>     -    Signed-off-by: Junio C Hamano <gitster@pobox.com>
>      
>      diff --git a/builtin/fetch.c b/builtin/fetch.c
>      --- a/builtin/fetch.c
>     @@ -75,16 +72,16 @@
>       	int result;
>       
>       	struct string_list changed_submodule_names;
>     -+	struct get_next_submodule_task **retry;
>     -+	int retry_nr, retry_alloc;
>     ++	struct get_next_submodule_task **fetch_specific_oids;
>     ++	int fetch_specific_oids_nr, fetch_specific_oids_alloc;
>       };
>      -#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
>      +#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, \
>      +		  STRING_LIST_INIT_DUP, \
>      +		  NULL, 0, 0}
>       
>     - static void calculate_changed_submodule_paths(
>     - 	struct submodule_parallel_fetch *spf)
>     + static int get_fetch_recurse_config(const struct submodule *submodule,
>     + 				    struct submodule_parallel_fetch *spf)
>      @@
>       	return spf->default_option;
>       }
>     @@ -93,6 +90,8 @@
>      +	struct repository *repo;
>      +	const struct submodule *sub;
>      +	unsigned free_sub : 1; /* Do we need to free the submodule? */
>     ++
>     ++	/* fetch specific oids if set, otherwise fetch default refspec */
>      +	struct oid_array *commits;
>      +};
>      +
>     @@ -224,24 +223,29 @@
>      -			repo_clear(repo);
>      -			free(repo);
>      -			ret = 1;
>     --		}
>     --		strbuf_release(&submodule_prefix);
>     --		if (ret) {
>     - 			spf->count++;
>     ++			spf->count++;
>      +			*task_cb = task;
>      +
>      +			strbuf_release(&submodule_prefix);
>     - 			return 1;
>     -+		} else {
>     ++			return 1;
>     + 		} else {
>     ++
>      +			get_next_submodule_task_release(task);
>      +			free(task);
>     ++
>     + 			/*
>     + 			 * An empty directory is normal,
>     + 			 * the submodule is not initialized
>     +@@
>     + 			    !is_empty_dir(ce->name))
>     + 				die(_("Could not access submodule '%s'"), ce->name);
>       		}
>     - 	}
>     ++	}
>      +
>     -+	if (spf->retry_nr) {
>     -+		struct get_next_submodule_task *task = spf->retry[spf->retry_nr - 1];
>     ++	if (spf->fetch_specific_oids_nr) {
>     ++		struct get_next_submodule_task *task = spf->fetch_specific_oids[spf->fetch_specific_oids_nr - 1];
>      +		struct strbuf submodule_prefix = STRBUF_INIT;
>     -+		spf->retry_nr--;
>     ++		spf->fetch_specific_oids_nr--;
>      +
>      +		strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, task->sub->path);
>      +
>     @@ -262,9 +266,13 @@
>      +					  append_oid_to_argv, &cp->args);
>      +
>      +		*task_cb = task;
>     -+		strbuf_release(&submodule_prefix);
>     + 		strbuf_release(&submodule_prefix);
>     +-		if (ret) {
>     +-			spf->count++;
>     +-			return 1;
>     +-		}
>      +		return 1;
>     -+	}
>     + 	}
>      +
>       	return 0;
>       }
>     @@ -326,9 +334,11 @@
>      +			return 0;
>      +
>      +		task->commits = commits;
>     -+		ALLOC_GROW(spf->retry, spf->retry_nr + 1, spf->retry_alloc);
>     -+		spf->retry[spf->retry_nr] = task;
>     -+		spf->retry_nr++;
>     ++		ALLOC_GROW(spf->fetch_specific_oids,
>     ++			   spf->fetch_specific_oids_nr + 1,
>     ++			   spf->fetch_specific_oids_alloc);
>     ++		spf->fetch_specific_oids[spf->fetch_specific_oids_nr] = task;
>     ++		spf->fetch_specific_oids_nr++;
>      +		return 0;
>      +	}
>      +
>     @@ -346,18 +356,33 @@
>       	test_cmp expect actual
>       '
>       
>     -+test_expect_success "fetch new commits on-demand when they are not reachable" '
>     ++test_expect_success "fetch new submodule commits on-demand outside standard refspec" '
>     ++	# add a second submodule and ensure it is around in downstream first
>     ++	git clone submodule sub1 &&
>     ++	git submodule add ./sub1 &&
>     ++	git commit -m "adding a second submodule" &&
>     ++	git -C downstream pull &&
>     ++	git -C downstream submodule update --init --recursive &&
>     ++
>      +	git checkout --detach &&
>     ++
>      +	C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
>      +	git -C submodule update-ref refs/changes/1 $C &&
>      +	git update-index --cacheinfo 160000 $C submodule &&
>     -+	git commit -m "updated submodule outside of refs/heads" &&
>     -+	D=$(git rev-parse HEAD) &&
>     -+	git update-ref refs/changes/2 $D &&
>     ++	test_tick &&
>     ++
>     ++	D=$(git -C sub1 commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
>     ++	git -C sub1 update-ref refs/changes/2 $D &&
>     ++	git update-index --cacheinfo 160000 $D sub1 &&
>     ++
>     ++	git commit -m "updated submodules outside of refs/heads" &&
>     ++	E=$(git rev-parse HEAD) &&
>     ++	git update-ref refs/changes/2 $E &&
>      +	(
>      +		cd downstream &&
>     -+		git fetch --recurse-submodules --recurse-submodules-default on-demand origin refs/changes/2:refs/heads/my_branch &&
>     ++		git fetch --recurse-submodules origin refs/changes/2:refs/heads/my_branch &&
>      +		git -C submodule cat-file -t $C &&
>     ++		git -C sub1 cat-file -t $D &&
>      +		git checkout --recurse-submodules FETCH_HEAD
>      +	)
>      +'
> 6:  b8db3b45bf < -:  ---------- builtin/fetch: check for submodule updates for non branch fetches
> 7:  905a4f0d4f < -:  ---------- fixup! repository: repo_submodule_init to take a submodule struct
> -:  ---------- > 7:  11bf819782 builtin/fetch: check for submodule updates in any ref update

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched
  2018-10-26 20:41   ` Jonathan Tan
@ 2018-11-29  0:30     ` Stefan Beller
  0 siblings, 0 replies; 19+ messages in thread
From: Stefan Beller @ 2018-11-29  0:30 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

On Fri, Oct 26, 2018 at 1:41 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> > But this default fetch is not sufficient, as a newly fetched commit in
> > the superproject could point to a commit in the submodule that is not
> > in the default refspec. This is common in workflows like Gerrit's.
> > When fetching a Gerrit change under review (from refs/changes/??), the
> > commits in that change likely point to submodule commits that have not
> > been merged to a branch yet.
> >
> > Try fetching a submodule by object id if the object id that the
> > superproject points to, cannot be found.
>
> I see that these suggestions of mine (from [1]) was implemented, but not
> others. If you disagree, that's fine, but I think they should be
> discussed.

ok.

>
> > -             if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
> > -                 (recurse_submodules != RECURSE_SUBMODULES_ON))
> > +             if (recurse_submodules != RECURSE_SUBMODULES_OFF)
>
> I think the next patch should be squashed into this patch. Then you can
> say that these are now redundant and can be removed.

ok.

>
> > @@ -1218,8 +1218,12 @@ struct submodule_parallel_fetch {
> >       int result;
> >
> >       struct string_list changed_submodule_names;
> > +     struct get_next_submodule_task **fetch_specific_oids;
> > +     int fetch_specific_oids_nr, fetch_specific_oids_alloc;
> >  };
>
> Add documentation for fetch_specific_oids. Also, it might be better to
> call these oid_fetch_tasks and the struct, "struct fetch_task".

ok.

> Here, struct get_next_submodule_task is used for 2 different things:
>  (1) After the first round fetch, fetch_finish() uses it to determine if
>      a second round is needed.
>  (2) In submodule_parallel_fetch.fetch_specific_oids, to tell the
>      parallel runner (through get_next_submodule_task()) to do this
>      fetch.
>
> I think that it's better to have 2 different structs for these 2
> different uses. (Note that task_cb can be NULL for the second round.
> Unless we plan to recheck the OIDs? Currently we recheck them, but we
> don't do anything either way.)

I think it is easier to only have one struct until we have substantially
more to communicate. (1) is a boolean answer, for which (non-)NULL
is sufficient.


> I think that this is best described as the submodule that has no entry
> in .gitmodules? Maybe call it "get_non_gitmodules_submodule" and
> document it with a similar comment as in get_submodule_repo_for().

done.

>
> > +
> > +static struct get_next_submodule_task *get_next_submodule_task_create(
> > +     struct repository *r, const char *path)
> > +{
> > +     struct get_next_submodule_task *task = xmalloc(sizeof(*task));
> > +     memset(task, 0, sizeof(*task));
> > +
> > +     task->sub = submodule_from_path(r, &null_oid, path);
> > +     if (!task->sub) {
> > +             task->sub = get_default_submodule(path);
> > +             task->free_sub = 1;
> > +     }
> > +
> > +     return task;
> > +}
>
> Clearer to me would be to make get_next_submodule_task_create() return
> NULL if submodule_from_path() returns NULL.

I doubled down on this one and return NULL when get_default_submodule
(now renamed to get_non_gitmodules_submodule) returns NULL, as then we
can move the free() from get_next_submodule here and there we'll just have

    task = fetch_task_create(spf->r, ce->name);
    if (!task)
        continue;

which helps get_next_submodule to stay readable.


> Same comment about "on-demand" as in my previous e-mail.

I'd want to push back on refactoring and defer that to a later series.

> Break lines to 80.
[...]
> Same comment about "s--h" as in my previous e-mail.

done

> > +     /* Are there commits that do not exist? */
> > +     if (commits->nr) {
> > +             /* We already tried fetching them, do not try again. */
> > +             if (task->commits)
> > +                     return 0;
>
> Same comment about "task->commits" as in my previous e-mail.

Good call. I reordered the function read easier and added a comment
on any early return how it could happen.

>
> > diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
> > index 6c2f9b2ba2..5a75b57852 100755
>
> One more thing to test is the case where a submodule doesn't have a
> .gitmodules entry.

added a test.

I just resent the series.

Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2018-11-29  0:30 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-25 23:32 [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Stefan Beller
2018-10-25 23:32 ` [PATCH 01/10] sha1-array: provide oid_array_filter Stefan Beller
2018-10-25 23:32 ` [PATCH 02/10] submodule.c: fix indentation Stefan Beller
2018-10-25 23:32 ` [PATCH 03/10] submodule.c: sort changed_submodule_names before searching it Stefan Beller
2018-10-25 23:32 ` [PATCH 04/10] submodule.c: tighten scope of changed_submodule_names struct Stefan Beller
2018-10-25 23:32 ` [PATCH 05/10] submodule: store OIDs in changed_submodule_names Stefan Beller
2018-10-26 18:57   ` Jonathan Tan
2018-10-25 23:32 ` [PATCH 06/10] repository: repo_submodule_init to take a submodule struct Stefan Beller
2018-10-26 19:15   ` Jonathan Tan
2018-10-26 22:01     ` Stefan Beller
2018-10-25 23:32 ` [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs Stefan Beller
2018-10-26 19:26   ` Jonathan Tan
2018-10-25 23:32 ` [PATCH 08/10] submodule.c: fetch in submodules git directory instead of in worktree Stefan Beller
2018-10-25 23:32 ` [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched Stefan Beller
2018-10-26 20:41   ` Jonathan Tan
2018-11-29  0:30     ` Stefan Beller
2018-10-25 23:32 ` [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update Stefan Beller
2018-10-26 20:42   ` Jonathan Tan
2018-10-29  3:44 ` [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).