From: Stefan Beller <sbeller@google.com>
To: jonathantanmy@google.com
Cc: git@vger.kernel.org, Stefan Beller <sbeller@google.com>
Subject: [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip
Date: Thu, 25 Oct 2018 16:32:21 -0700 [thread overview]
Message-ID: <20181025233231.102245-1-sbeller@google.com> (raw)
Thanks Jonathan for your thoughtful comments,
here is the product of the discussion:
* I split up the patch to fetch in the worktree to be 2 patches,
each giving motivation on its own.
* the last patch is vastly simplified in code, but takes an extra test
* in [1], you remark "commits" ought not to be a pointer, but I decided against
that, as we keep the pointed-at value around for the same time span (until
we're done with that submodule) and we don't need to copy over the pointed-at
value into the new struct.
[1] https://public-inbox.org/git/20181018003954.139498-1-jonathantanmy@google.com/
This is still based on ao/submodule-wo-gitmodules-checked-out.
previous version
https://public-inbox.org/git/20181016181327.107186-1-sbeller@google.com/
Stefan Beller (10):
sha1-array: provide oid_array_filter
submodule.c: fix indentation
submodule.c: sort changed_submodule_names before searching it
submodule.c: tighten scope of changed_submodule_names struct
submodule: store OIDs in changed_submodule_names
repository: repo_submodule_init to take a submodule struct
submodule: migrate get_next_submodule to use repository structs
submodule.c: fetch in submodules git directory instead of in worktree
fetch: try fetching submodules if needed objects were not fetched
builtin/fetch: check for submodule updates in any ref update
Documentation/technical/api-oid-array.txt | 5 +
builtin/fetch.c | 11 +-
builtin/grep.c | 17 +-
builtin/ls-files.c | 12 +-
builtin/submodule--helper.c | 2 +-
repository.c | 27 +-
repository.h | 12 +-
sha1-array.c | 17 ++
sha1-array.h | 3 +
submodule.c | 265 ++++++++++++++++---
t/helper/test-submodule-nested-repo-config.c | 8 +-
t/t5526-fetch-submodules.sh | 55 ++++
12 files changed, 346 insertions(+), 88 deletions(-)
git range-diff origin/sb/submodule-recursive-fetch-gets-the-tip...
1: 3fbb06aedd ! 1: 0fdb0e2ad9 submodule.c: move global changed_submodule_names into fetch submodule struct
@@ -1,12 +1,11 @@
Author: Stefan Beller <sbeller@google.com>
- submodule.c: move global changed_submodule_names into fetch submodule struct
+ submodule.c: tighten scope of changed_submodule_names struct
The `changed_submodule_names` are only used for fetching, so let's make it
part of the struct that is passed around for fetching submodules.
Signed-off-by: Stefan Beller <sbeller@google.com>
- Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/submodule.c b/submodule.c
--- a/submodule.c
@@ -16,7 +15,6 @@
static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
-static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP;
-+
static int initialized_fetch_ref_tips;
static struct oid_array ref_tips_before_fetch;
static struct oid_array ref_tips_after_fetch;
@@ -25,22 +23,8 @@
}
-static void calculate_changed_submodule_paths(void)
-+struct submodule_parallel_fetch {
-+ int count;
-+ struct argv_array args;
-+ struct repository *r;
-+ const char *prefix;
-+ int command_line_option;
-+ int default_option;
-+ int quiet;
-+ int result;
-+
-+ struct string_list changed_submodule_names;
-+};
-+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
-+
+static void calculate_changed_submodule_paths(
-+ struct submodule_parallel_fetch *spf)
++ struct string_list *changed_submodule_names)
{
struct argv_array argv = ARGV_ARRAY_INIT;
struct string_list changed_submodules = STRING_LIST_INIT_DUP;
@@ -49,30 +33,23 @@
if (!submodule_has_commits(path, commits))
- string_list_append(&changed_submodule_names, name->string);
-+ string_list_append(&spf->changed_submodule_names,
++ string_list_append(changed_submodule_names,
+ name->string);
}
free_submodules_oids(&changed_submodules);
@@
- return ret;
- }
-
--struct submodule_parallel_fetch {
-- int count;
-- struct argv_array args;
-- struct repository *r;
-- const char *prefix;
-- int command_line_option;
-- int default_option;
-- int quiet;
-- int result;
--};
+ int default_option;
+ int quiet;
+ int result;
++
++ struct string_list changed_submodule_names;
+ };
-#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0}
--
++#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
+
static int get_fetch_recurse_config(const struct submodule *submodule,
struct submodule_parallel_fetch *spf)
- {
@@
case RECURSE_SUBMODULES_ON_DEMAND:
if (!submodule ||
@@ -88,7 +65,7 @@
- calculate_changed_submodule_paths();
- string_list_sort(&changed_submodule_names);
-+ calculate_changed_submodule_paths(&spf);
++ calculate_changed_submodule_paths(&spf.changed_submodule_names);
+ string_list_sort(&spf.changed_submodule_names);
run_processes_parallel(max_parallel_jobs,
get_next_submodule,
2: a64a8322a1 ! 2: a11e6e39a2 submodule.c: do not copy around submodule list
@@ -1,6 +1,6 @@
Author: Stefan Beller <sbeller@google.com>
- submodule.c: do not copy around submodule list
+ submodule: store OIDs in changed_submodule_names
'calculate_changed_submodule_paths' uses a local list to compute the
changed submodules, and then produces the result by copying appropriate
@@ -14,13 +14,13 @@
useful in a later patch.
Signed-off-by: Stefan Beller <sbeller@google.com>
- Signed-off-by: Junio C Hamano <gitster@pobox.com>
+ Reviewed-by: Jonathan Tan <jonathantanmy@google.com>
diff --git a/submodule.c b/submodule.c
--- a/submodule.c
+++ b/submodule.c
@@
- struct submodule_parallel_fetch *spf)
+ struct string_list *changed_submodule_names)
{
struct argv_array argv = ARGV_ARRAY_INIT;
- struct string_list changed_submodules = STRING_LIST_INIT_DUP;
@@ -34,10 +34,10 @@
* commits have been recorded upstream in "changed_submodule_names".
*/
- collect_changed_submodules(&changed_submodules, &argv);
-+ collect_changed_submodules(&spf->changed_submodule_names, &argv);
++ collect_changed_submodules(changed_submodule_names, &argv);
- for_each_string_list_item(name, &changed_submodules) {
-+ for_each_string_list_item(name, &spf->changed_submodule_names) {
++ for_each_string_list_item(name, changed_submodule_names) {
struct oid_array *commits = name->util;
const struct submodule *submodule;
const char *path = NULL;
@@ -46,7 +46,7 @@
continue;
- if (!submodule_has_commits(path, commits))
-- string_list_append(&spf->changed_submodule_names,
+- string_list_append(changed_submodule_names,
- name->string);
+ if (submodule_has_commits(path, commits)) {
+ oid_array_clear(commits);
@@ -55,7 +55,7 @@
}
- free_submodules_oids(&changed_submodules);
-+ string_list_remove_empty_items(&spf->changed_submodule_names, 1);
++ string_list_remove_empty_items(changed_submodule_names, 1);
+
argv_array_clear(&argv);
oid_array_clear(&ref_tips_before_fetch);
3: 9341b92c83 ! 3: 3f4e0d4b8d repository: repo_submodule_init to take a submodule struct
@@ -5,17 +5,19 @@
When constructing a struct repository for a submodule for some revision
of the superproject where the submodule is not contained in the index,
it may not be present in the working tree currently either. In that
- siutation giving a 'path' argument is not useful. Upgrade the
+ situation giving a 'path' argument is not useful. Upgrade the
repo_submodule_init function to take a struct submodule instead.
+ The submodule struct can be obtained via submodule_from_{path, name} or
+ an artificial submodule struct can be passed in.
- While we are at it, overhaul the repo_submodule_init function by renaming
- the submodule repository struct, which is to be initialized, to a name
- that is not confused with the struct submodule as easily.
+ While we are at it, rename the repository struct in the repo_submodule_init
+ function, which is to be initialized, to a name that is not confused with
+ the struct submodule as easily. Perform such renames in similar functions
+ as well.
Also move its documentation into the header file.
Signed-off-by: Stefan Beller <sbeller@google.com>
- Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/builtin/grep.c b/builtin/grep.c
--- a/builtin/grep.c
@@ -104,6 +106,19 @@
static void show_ce(struct repository *repo, struct dir_struct *dir,
+diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
+--- a/builtin/submodule--helper.c
++++ b/builtin/submodule--helper.c
+@@
+ if (!sub)
+ BUG("We could get the submodule handle before?");
+
+- if (repo_submodule_init(&subrepo, the_repository, path))
++ if (repo_submodule_init(&subrepo, the_repository, sub))
+ die(_("could not get a repository handle for submodule '%s'"), path);
+
+ if (!repo_config_get_string(&subrepo, "core.worktree", &cw)) {
+
diff --git a/repository.c b/repository.c
--- a/repository.c
+++ b/repository.c
@@ -178,7 +193,8 @@
+/*
+ * Initialize the repository 'subrepo' as the submodule given by the
+ * struct submodule 'sub' in parent repository 'superproject'.
-+ * Return 0 upon success and a non-zero value upon failure.
++ * Return 0 upon success and a non-zero value upon failure, which may happen
++ * if the submodule is not found, or 'sub' is NULL.
+ */
+struct submodule;
+int repo_submodule_init(struct repository *subrepo,
4: cea909cbd4 ! 4: b1566069e7 submodule: fetch in submodules git directory instead of in worktree
@@ -1,44 +1,19 @@
Author: Stefan Beller <sbeller@google.com>
- submodule: fetch in submodules git directory instead of in worktree
+ submodule: migrate get_next_submodule to use repository structs
- This patch started as a refactoring to make 'get_next_submodule' more
- readable, but upon doing so, I realized that "git fetch" of the submodule
- actually doesn't need to be run in the submodules worktree. So let's run
- it in its git dir instead.
+ We used to recurse into submodules, even if they were broken having
+ only an objects directory. The child process executed in the submodule
+ would fail though if the submodule was broken.
- That should pave the way towards fetching submodules that are currently
- not checked out.
-
- This patch leaks the cp->dir in get_next_submodule, as any further
- callback in run_processes_parallel doesn't have access to the child
- process any more. In an early iteration of this patch, the function
- get_submodule_repo_for directly returned the string containing the
- git directory, which would be a better design choice for this patch.
-
- However the next patch both fixes the memory leak of cp->dir and also has
- a use case for using the full repository handle of the submodule, so
- it makes sense to introduce the get_submodule_repo_for here already.
+ This patch tightens the check upfront, such that we do not need
+ to spawn a child process to find out if the submodule is broken.
Signed-off-by: Stefan Beller <sbeller@google.com>
- Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/submodule.c b/submodule.c
--- a/submodule.c
+++ b/submodule.c
-@@
- DEFAULT_GIT_DIR_ENVIRONMENT);
- }
-
-+static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out)
-+{
-+ prepare_submodule_repo_env_no_git_dir(out);
-+ argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT);
-+}
-+
- /* Helper function to display the submodule header line prior to the full
- * summary output. If it can locate the submodule objects directory it will
- * attempt to lookup both the left and right commits and put them into the
@@
return spf->default_option;
}
@@ -99,9 +74,8 @@
+ if (repo) {
child_process_init(cp);
- cp->dir = strbuf_detach(&submodule_path, NULL);
-- prepare_submodule_repo_env(&cp->env_array);
-+ prepare_submodule_repo_env_in_gitdir(&cp->env_array);
-+ cp->dir = xstrdup(repo->gitdir);
+ prepare_submodule_repo_env(&cp->env_array);
++ cp->dir = xstrdup(repo->worktree);
cp->git_cmd = 1;
if (!spf->quiet)
strbuf_addf(err, "Fetching submodule %s%s\n",
@@ -113,27 +87,17 @@
+ repo_clear(repo);
+ free(repo);
ret = 1;
++ } else {
++ /*
++ * An empty directory is normal,
++ * the submodule is not initialized
++ */
++ if (S_ISGITLINK(ce->ce_mode) &&
++ !is_empty_dir(ce->name))
++ die(_("Could not access submodule '%s'"), ce->name);
}
- strbuf_release(&submodule_path);
- strbuf_release(&submodule_git_dir);
strbuf_release(&submodule_prefix);
if (ret) {
spf->count++;
-
-diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
---- a/t/t5526-fetch-submodules.sh
-+++ b/t/t5526-fetch-submodules.sh
-@@
-
- test_must_fail git -C dst status &&
- test_must_fail git -C dst diff &&
-- test_must_fail git -C dst fetch --recurse-submodules
-+
-+ # git-fetch cannot find the git directory of the submodule,
-+ # so it will do nothing, successfully, as it cannot distinguish between
-+ # this broken submodule and a submodule that was just set active but
-+ # not cloned yet
-+ git -C dst fetch --recurse-submodules
- '
-
- test_expect_success "fetch new commits when submodule got renamed" '
-: ---------- > 5: 2d98ff1201 submodule.c: fetch in submodules git directory instead of in worktree
5: 9ad0125310 ! 6: 092b9cbcba fetch: retry fetching submodules if needed objects were not fetched
@@ -1,25 +1,23 @@
Author: Stefan Beller <sbeller@google.com>
- fetch: retry fetching submodules if needed objects were not fetched
+ fetch: try fetching submodules if needed objects were not fetched
Currently when git-fetch is asked to recurse into submodules, it dispatches
a plain "git-fetch -C <submodule-dir>" (with some submodule related options
such as prefix and recusing strategy, but) without any information of the
remote or the tip that should be fetched.
- This works surprisingly well in some workflows (such as using submodules
- as a third party library), while not so well in other scenarios, such
- as in a Gerrit topic-based workflow, that can tie together changes
- (potentially across repositories) on the server side. One of the parts
- of such a Gerrit workflow is to download a change when wanting to examine
- it, and you'd want to have its submodule changes that are in the same
- topic downloaded as well. However these submodule changes reside in their
- own repository in their own ref (refs/changes/<int>).
+ But this default fetch is not sufficient, as a newly fetched commit in
+ the superproject could point to a commit in the submodule that is not
+ in the default refspec. This is common in workflows like Gerrit's.
+ When fetching a Gerrit change under review (from refs/changes/??), the
+ commits in that change likely point to submodule commits that have not
+ been merged to a branch yet.
- Retry fetching a submodule by object name if the object id that the
+ Try fetching a submodule by object id if the object id that the
superproject points to, cannot be found.
- This retrying does not happen when the "git fetch" done at the
+ The try does not happen when the "git fetch" done at the
superproject is not storing the fetched results in remote
tracking branches (i.e. instead just recording them to
FETCH_HEAD) in this step. A later patch will fix this.
@@ -32,7 +30,6 @@
in case the submodule recursion is set to "on".
Signed-off-by: Stefan Beller <sbeller@google.com>
- Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff --git a/builtin/fetch.c b/builtin/fetch.c
--- a/builtin/fetch.c
@@ -75,16 +72,16 @@
int result;
struct string_list changed_submodule_names;
-+ struct get_next_submodule_task **retry;
-+ int retry_nr, retry_alloc;
++ struct get_next_submodule_task **fetch_specific_oids;
++ int fetch_specific_oids_nr, fetch_specific_oids_alloc;
};
-#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP }
+#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, \
+ STRING_LIST_INIT_DUP, \
+ NULL, 0, 0}
- static void calculate_changed_submodule_paths(
- struct submodule_parallel_fetch *spf)
+ static int get_fetch_recurse_config(const struct submodule *submodule,
+ struct submodule_parallel_fetch *spf)
@@
return spf->default_option;
}
@@ -93,6 +90,8 @@
+ struct repository *repo;
+ const struct submodule *sub;
+ unsigned free_sub : 1; /* Do we need to free the submodule? */
++
++ /* fetch specific oids if set, otherwise fetch default refspec */
+ struct oid_array *commits;
+};
+
@@ -224,24 +223,29 @@
- repo_clear(repo);
- free(repo);
- ret = 1;
-- }
-- strbuf_release(&submodule_prefix);
-- if (ret) {
- spf->count++;
++ spf->count++;
+ *task_cb = task;
+
+ strbuf_release(&submodule_prefix);
- return 1;
-+ } else {
++ return 1;
+ } else {
++
+ get_next_submodule_task_release(task);
+ free(task);
++
+ /*
+ * An empty directory is normal,
+ * the submodule is not initialized
+@@
+ !is_empty_dir(ce->name))
+ die(_("Could not access submodule '%s'"), ce->name);
}
- }
++ }
+
-+ if (spf->retry_nr) {
-+ struct get_next_submodule_task *task = spf->retry[spf->retry_nr - 1];
++ if (spf->fetch_specific_oids_nr) {
++ struct get_next_submodule_task *task = spf->fetch_specific_oids[spf->fetch_specific_oids_nr - 1];
+ struct strbuf submodule_prefix = STRBUF_INIT;
-+ spf->retry_nr--;
++ spf->fetch_specific_oids_nr--;
+
+ strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, task->sub->path);
+
@@ -262,9 +266,13 @@
+ append_oid_to_argv, &cp->args);
+
+ *task_cb = task;
-+ strbuf_release(&submodule_prefix);
+ strbuf_release(&submodule_prefix);
+- if (ret) {
+- spf->count++;
+- return 1;
+- }
+ return 1;
-+ }
+ }
+
return 0;
}
@@ -326,9 +334,11 @@
+ return 0;
+
+ task->commits = commits;
-+ ALLOC_GROW(spf->retry, spf->retry_nr + 1, spf->retry_alloc);
-+ spf->retry[spf->retry_nr] = task;
-+ spf->retry_nr++;
++ ALLOC_GROW(spf->fetch_specific_oids,
++ spf->fetch_specific_oids_nr + 1,
++ spf->fetch_specific_oids_alloc);
++ spf->fetch_specific_oids[spf->fetch_specific_oids_nr] = task;
++ spf->fetch_specific_oids_nr++;
+ return 0;
+ }
+
@@ -346,18 +356,33 @@
test_cmp expect actual
'
-+test_expect_success "fetch new commits on-demand when they are not reachable" '
++test_expect_success "fetch new submodule commits on-demand outside standard refspec" '
++ # add a second submodule and ensure it is around in downstream first
++ git clone submodule sub1 &&
++ git submodule add ./sub1 &&
++ git commit -m "adding a second submodule" &&
++ git -C downstream pull &&
++ git -C downstream submodule update --init --recursive &&
++
+ git checkout --detach &&
++
+ C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
+ git -C submodule update-ref refs/changes/1 $C &&
+ git update-index --cacheinfo 160000 $C submodule &&
-+ git commit -m "updated submodule outside of refs/heads" &&
-+ D=$(git rev-parse HEAD) &&
-+ git update-ref refs/changes/2 $D &&
++ test_tick &&
++
++ D=$(git -C sub1 commit-tree -m "new change outside refs/heads" HEAD^{tree}) &&
++ git -C sub1 update-ref refs/changes/2 $D &&
++ git update-index --cacheinfo 160000 $D sub1 &&
++
++ git commit -m "updated submodules outside of refs/heads" &&
++ E=$(git rev-parse HEAD) &&
++ git update-ref refs/changes/2 $E &&
+ (
+ cd downstream &&
-+ git fetch --recurse-submodules --recurse-submodules-default on-demand origin refs/changes/2:refs/heads/my_branch &&
++ git fetch --recurse-submodules origin refs/changes/2:refs/heads/my_branch &&
+ git -C submodule cat-file -t $C &&
++ git -C sub1 cat-file -t $D &&
+ git checkout --recurse-submodules FETCH_HEAD
+ )
+'
6: b8db3b45bf < -: ---------- builtin/fetch: check for submodule updates for non branch fetches
7: 905a4f0d4f < -: ---------- fixup! repository: repo_submodule_init to take a submodule struct
-: ---------- > 7: 11bf819782 builtin/fetch: check for submodule updates in any ref update
next reply other threads:[~2018-10-25 23:32 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-25 23:32 Stefan Beller [this message]
2018-10-25 23:32 ` [PATCH 01/10] sha1-array: provide oid_array_filter Stefan Beller
2018-10-25 23:32 ` [PATCH 02/10] submodule.c: fix indentation Stefan Beller
2018-10-25 23:32 ` [PATCH 03/10] submodule.c: sort changed_submodule_names before searching it Stefan Beller
2018-10-25 23:32 ` [PATCH 04/10] submodule.c: tighten scope of changed_submodule_names struct Stefan Beller
2018-10-25 23:32 ` [PATCH 05/10] submodule: store OIDs in changed_submodule_names Stefan Beller
2018-10-26 18:57 ` Jonathan Tan
2018-10-25 23:32 ` [PATCH 06/10] repository: repo_submodule_init to take a submodule struct Stefan Beller
2018-10-26 19:15 ` Jonathan Tan
2018-10-26 22:01 ` Stefan Beller
2018-10-25 23:32 ` [PATCH 07/10] submodule: migrate get_next_submodule to use repository structs Stefan Beller
2018-10-26 19:26 ` Jonathan Tan
2018-10-25 23:32 ` [PATCH 08/10] submodule.c: fetch in submodules git directory instead of in worktree Stefan Beller
2018-10-25 23:32 ` [PATCH 09/10] fetch: try fetching submodules if needed objects were not fetched Stefan Beller
2018-10-26 20:41 ` Jonathan Tan
2018-11-29 0:30 ` Stefan Beller
2018-10-25 23:32 ` [PATCH 10/10] builtin/fetch: check for submodule updates in any ref update Stefan Beller
2018-10-26 20:42 ` Jonathan Tan
2018-10-29 3:44 ` [PATCH 00/10] Resending sb/submodule-recursive-fetch-gets-the-tip Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181025233231.102245-1-sbeller@google.com \
--to=sbeller@google.com \
--cc=git@vger.kernel.org \
--cc=jonathantanmy@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).