git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [RFC] submodule: munge paths to submodule git directories
@ 2018-08-07 23:06 Brandon Williams
  2018-08-07 23:25 ` Jonathan Nieder
                   ` (3 more replies)
  0 siblings, 4 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-07 23:06 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Commit 0383bbb901 (submodule-config: verify submodule names as paths,
2018-04-30) introduced some checks to ensure that submodule names don't
include directory traversal components (e.g. "../").

This addresses the vulnerability identified in 0383bbb901 but the root
cause is that we use submodule names to construct paths to the
submodule's git directory.  What we really should do is munge the
submodule name before using it to construct a path.

Introduce a function "strbuf_submodule_gitdir()" which callers can use
to build a path to a submodule's gitdir.  This allows for a single
location where we can munge the submodule name (by url encoding it)
before using it as part of a path.

Signed-off-by: Brandon Williams <bmwill@google.com>
---

Using submodule names as is continues to be not such a good idea.  Maybe
we could apply something like this to stop using them as is.  url
encoding seems like the easiest approach, but I've also heard
suggestions that would could use the SHA1 of the submodule name.

Any thoughts?

 builtin/submodule--helper.c      | 10 ++++--
 dir.c                            |  2 +-
 repository.c                     |  3 +-
 submodule.c                      | 57 +++++++++++++++++++++++---------
 submodule.h                      |  3 ++
 t/t7400-submodule-basic.sh       |  2 +-
 t/t7406-submodule-update.sh      | 21 ++++--------
 t/t7410-submodule-checkout-to.sh |  6 ++--
 8 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index bd250ca216..37b7353167 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -1122,18 +1122,24 @@ static int add_possible_reference_from_superproject(
 	 * standard layout with .git/(modules/<name>)+/objects
 	 */
 	if (ends_with(alt->path, "/objects")) {
+		struct repository alternate;
 		char *sm_alternate;
 		struct strbuf sb = STRBUF_INIT;
 		struct strbuf err = STRBUF_INIT;
 		strbuf_add(&sb, alt->path, strlen(alt->path) - strlen("objects"));
 
+		repo_init(&alternate, sb.buf, NULL);
+
 		/*
 		 * We need to end the new path with '/' to mark it as a dir,
 		 * otherwise a submodule name containing '/' will be broken
 		 * as the last part of a missing submodule reference would
 		 * be taken as a file name.
 		 */
-		strbuf_addf(&sb, "modules/%s/", sas->submodule_name);
+		strbuf_reset(&sb);
+		strbuf_submodule_gitdir(&sb, &alternate, sas->submodule_name);
+		strbuf_addch(&sb, '/');
+		repo_clear(&alternate);
 
 		sm_alternate = compute_alternate_path(sb.buf, &err);
 		if (sm_alternate) {
@@ -1246,7 +1252,7 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 		usage_with_options(git_submodule_helper_usage,
 				   module_clone_options);
 
-	strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name);
+	strbuf_submodule_gitdir(&sb, the_repository, name);
 	sm_gitdir = absolute_pathdup(sb.buf);
 	strbuf_reset(&sb);
 
diff --git a/dir.c b/dir.c
index fe9bf58e4c..3463a5e0a5 100644
--- a/dir.c
+++ b/dir.c
@@ -3052,7 +3052,7 @@ static void connect_wt_gitdir_in_nested(const char *sub_worktree,
 		strbuf_reset(&sub_wt);
 		strbuf_reset(&sub_gd);
 		strbuf_addf(&sub_wt, "%s/%s", sub_worktree, sub->path);
-		strbuf_addf(&sub_gd, "%s/modules/%s", sub_gitdir, sub->name);
+		strbuf_submodule_gitdir(&sub_gd, &subrepo, sub->name);
 
 		connect_work_tree_and_git_dir(sub_wt.buf, sub_gd.buf, 1);
 	}
diff --git a/repository.c b/repository.c
index 02fe884603..15fabbd08d 100644
--- a/repository.c
+++ b/repository.c
@@ -194,8 +194,7 @@ int repo_submodule_init(struct repository *submodule,
 		 * submodule would not have a worktree.
 		 */
 		strbuf_reset(&gitdir);
-		strbuf_repo_git_path(&gitdir, superproject,
-				     "modules/%s", sub->name);
+		strbuf_submodule_gitdir(&gitdir, superproject, sub->name);
 
 		if (repo_init(submodule, gitdir.buf, NULL)) {
 			ret = -1;
diff --git a/submodule.c b/submodule.c
index 939d6870ec..1d571845e8 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1625,20 +1625,22 @@ int submodule_move_head(const char *path,
 				absorb_git_dir_into_superproject("", path,
 					ABSORB_GITDIR_RECURSE_SUBMODULES);
 		} else {
-			char *gitdir = xstrfmt("%s/modules/%s",
-				    get_git_common_dir(), sub->name);
-			connect_work_tree_and_git_dir(path, gitdir, 0);
-			free(gitdir);
+			struct strbuf gitdir = STRBUF_INIT;
+			strbuf_submodule_gitdir(&gitdir, the_repository,
+						sub->name);
+			connect_work_tree_and_git_dir(path, gitdir.buf, 0);
+			strbuf_release(&gitdir);
 
 			/* make sure the index is clean as well */
 			submodule_reset_index(path);
 		}
 
 		if (old_head && (flags & SUBMODULE_MOVE_HEAD_FORCE)) {
-			char *gitdir = xstrfmt("%s/modules/%s",
-				    get_git_common_dir(), sub->name);
-			connect_work_tree_and_git_dir(path, gitdir, 1);
-			free(gitdir);
+			struct strbuf gitdir = STRBUF_INIT;
+			strbuf_submodule_gitdir(&gitdir, the_repository,
+						sub->name);
+			connect_work_tree_and_git_dir(path, gitdir.buf, 1);
+			strbuf_release(&gitdir);
 		}
 	}
 
@@ -1711,7 +1713,7 @@ static void relocate_single_git_dir_into_superproject(const char *prefix,
 						      const char *path)
 {
 	char *old_git_dir = NULL, *real_old_git_dir = NULL, *real_new_git_dir = NULL;
-	const char *new_git_dir;
+	struct strbuf new_gitdir = STRBUF_INIT;
 	const struct submodule *sub;
 
 	if (submodule_uses_worktrees(path))
@@ -1729,10 +1731,10 @@ static void relocate_single_git_dir_into_superproject(const char *prefix,
 	if (!sub)
 		die(_("could not lookup name for submodule '%s'"), path);
 
-	new_git_dir = git_path("modules/%s", sub->name);
-	if (safe_create_leading_directories_const(new_git_dir) < 0)
-		die(_("could not create directory '%s'"), new_git_dir);
-	real_new_git_dir = real_pathdup(new_git_dir, 1);
+	strbuf_submodule_gitdir(&new_gitdir, the_repository, sub->name);
+	if (safe_create_leading_directories_const(new_gitdir.buf) < 0)
+		die(_("could not create directory '%s'"), new_gitdir.buf);
+	real_new_git_dir = real_pathdup(new_gitdir.buf, 1);
 
 	fprintf(stderr, _("Migrating git directory of '%s%s' from\n'%s' to\n'%s'\n"),
 		get_super_prefix_or_empty(), path,
@@ -1743,6 +1745,7 @@ static void relocate_single_git_dir_into_superproject(const char *prefix,
 	free(old_git_dir);
 	free(real_old_git_dir);
 	free(real_new_git_dir);
+	strbuf_release(&new_gitdir);
 }
 
 /*
@@ -1763,6 +1766,7 @@ void absorb_git_dir_into_superproject(const char *prefix,
 	/* Not populated? */
 	if (!sub_git_dir) {
 		const struct submodule *sub;
+		struct strbuf sub_gitdir = STRBUF_INIT;
 
 		if (err_code == READ_GITFILE_ERR_STAT_FAILED) {
 			/* unpopulated as expected */
@@ -1784,8 +1788,9 @@ void absorb_git_dir_into_superproject(const char *prefix,
 		sub = submodule_from_path(the_repository, &null_oid, path);
 		if (!sub)
 			die(_("could not lookup name for submodule '%s'"), path);
-		connect_work_tree_and_git_dir(path,
-			git_path("modules/%s", sub->name), 0);
+		strbuf_submodule_gitdir(&sub_gitdir, the_repository, sub->name);
+		connect_work_tree_and_git_dir(path, sub_gitdir.buf, 0);
+		strbuf_release(&sub_gitdir);
 	} else {
 		/* Is it already absorbed into the superprojects git dir? */
 		char *real_sub_git_dir = real_pathdup(sub_git_dir, 1);
@@ -1933,9 +1938,29 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule)
 			goto cleanup;
 		}
 		strbuf_reset(buf);
-		strbuf_git_path(buf, "%s/%s", "modules", sub->name);
+		strbuf_submodule_gitdir(buf, the_repository, sub->name);
 	}
 
 cleanup:
 	return ret;
 }
+
+void strbuf_submodule_gitdir(struct strbuf *buf, struct repository *r,
+			     const char *submodule_name)
+{
+	int modules_len;
+
+	strbuf_git_common_path(buf, r, "modules/");
+	modules_len = buf->len;
+	strbuf_addstr(buf, submodule_name);
+
+	/*
+	 * If the submodule gitdir already exists using the old location then
+	 * return that.
+	 */
+	if (!access(buf->buf, F_OK))
+		return;
+
+	strbuf_setlen(buf, modules_len);
+	strbuf_addstr_urlencode(buf, submodule_name, 1);
+}
diff --git a/submodule.h b/submodule.h
index 7856b8a0b3..b56f89740d 100644
--- a/submodule.h
+++ b/submodule.h
@@ -114,6 +114,9 @@ extern int push_unpushed_submodules(struct oid_array *commits,
  */
 int submodule_to_gitdir(struct strbuf *buf, const char *submodule);
 
+void strbuf_submodule_gitdir(struct strbuf *buf, struct repository *r,
+			     const char *submodule_name);
+
 #define SUBMODULE_MOVE_HEAD_DRY_RUN (1<<0)
 #define SUBMODULE_MOVE_HEAD_FORCE   (1<<1)
 extern int submodule_move_head(const char *path,
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index 812db137b8..fce164484d 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -932,7 +932,7 @@ test_expect_success 'recursive relative submodules stay relative' '
 		cd clone2 &&
 		git submodule update --init --recursive &&
 		echo "gitdir: ../.git/modules/sub3" >./sub3/.git_expect &&
-		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect
+		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect
 	) &&
 	test_cmp clone2/sub3/.git_expect clone2/sub3/.git &&
 	test_cmp clone2/sub3/dirdir/subsub/.git_expect clone2/sub3/dirdir/subsub/.git
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index 9e0d31700e..c4e94c168d 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -777,12 +777,8 @@ test_expect_success 'submodule add places git-dir in superprojects git-dir' '
 	(cd super &&
 	 mkdir deeper &&
 	 git submodule add ../submodule deeper/submodule &&
-	 (cd deeper/submodule &&
-	  git log > ../../expected
-	 ) &&
-	 (cd .git/modules/deeper/submodule &&
-	  git log > ../../../../actual
-	 ) &&
+	 git -C deeper/submodule log >expected &&
+	 git -C .git/modules/deeper%2fsubmodule log >actual &&
 	 test_cmp actual expected
 	)
 '
@@ -795,12 +791,9 @@ test_expect_success 'submodule update places git-dir in superprojects git-dir' '
 	(cd super2 &&
 	 git submodule init deeper/submodule &&
 	 git submodule update &&
-	 (cd deeper/submodule &&
-	  git log > ../../expected
-	 ) &&
-	 (cd .git/modules/deeper/submodule &&
-	  git log > ../../../../actual
-	 ) &&
+
+	 git -C deeper/submodule log >expected &&
+	 git -C .git/modules/deeper%2fsubmodule log >actual &&
 	 test_cmp actual expected
 	)
 '
@@ -815,9 +808,7 @@ test_expect_success 'submodule add places git-dir in superprojects git-dir recur
 	  git commit -m "added subsubmodule" &&
 	  git push origin :
 	 ) &&
-	 (cd .git/modules/deeper/submodule/modules/subsubmodule &&
-	  git log > ../../../../../actual
-	 ) &&
+	 git -C .git/modules/deeper%2fsubmodule/modules/subsubmodule log >actual &&
 	 git add deeper/submodule &&
 	 git commit -m "update submodule" &&
 	 git push origin : &&
diff --git a/t/t7410-submodule-checkout-to.sh b/t/t7410-submodule-checkout-to.sh
index 1acef32647..c408e9010a 100755
--- a/t/t7410-submodule-checkout-to.sh
+++ b/t/t7410-submodule-checkout-to.sh
@@ -35,8 +35,10 @@ test_expect_success 'checkout main' \
     (cd clone/main &&
 	git worktree add "$base_path/default_checkout/main" "$rev1_hash_main")'
 
-test_expect_failure 'can see submodule diffs just after checkout' \
-    '(cd default_checkout/main && git diff --submodule master"^!" | grep "file1 updated")'
+test_expect_success 'can see submodule diffs just after checkout' '
+	git -C default_checkout/main diff --submodule master"^!" >out &&
+	grep "file1 updated" out
+'
 
 test_expect_success 'checkout main and initialize independed clones' \
     'mkdir fully_cloned_submodule &&
-- 
2.18.0.597.ga71716f1ad-goog


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC] submodule: munge paths to submodule git directories
  2018-08-07 23:06 [RFC] submodule: munge paths to submodule git directories Brandon Williams
@ 2018-08-07 23:25 ` Jonathan Nieder
  2018-08-08  0:14 ` Junio C Hamano
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-07 23:25 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git, Stefan Beller

Hi,

Brandon Williams wrote:

> Commit 0383bbb901 (submodule-config: verify submodule names as paths,
> 2018-04-30) introduced some checks to ensure that submodule names don't
> include directory traversal components (e.g. "../").
>
> This addresses the vulnerability identified in 0383bbb901 but the root
> cause is that we use submodule names to construct paths to the
> submodule's git directory.  What we really should do is munge the
> submodule name before using it to construct a path.
>
> Introduce a function "strbuf_submodule_gitdir()" which callers can use
> to build a path to a submodule's gitdir.  This allows for a single
> location where we can munge the submodule name (by url encoding it)
> before using it as part of a path.
>
> Signed-off-by: Brandon Williams <bmwill@google.com>
> ---
> Using submodule names as is continues to be not such a good idea.  Maybe
> we could apply something like this to stop using them as is.  url
> encoding seems like the easiest approach, but I've also heard
> suggestions that would could use the SHA1 of the submodule name.
>
> Any thoughts?

I like this idea.  It avoids the security and complexity problems of
funny nested directories, while still making the submodule git dirs
easy to find.

The current behavior has been particularly a problem in practice when
submodule names are nested:

	[submodule "a"]
		url = https://www.example.com/a
		path = a/1

	[submodule "a/b"]
		url = https://www.example.com/a/b
		path = a/2

We don't enforce any constraint on submodule names to prevent that,
but it causes hard to diagnose errors at clone time:

	fatal: not a git repository: superproject/a/1/../../.git/modules/a
	Unable to fetch in submodule path 'a/1'
	fatal: not a git repository: superproject/a/1/../../.git/modules/a
	fatal: not a git repository: superproject/a/1/../../.git/modules/a
	fatal: not a git repository: superproject/a/1/../../.git/modules/a
	Fetched in submodule 'a/1', but it did not contain 55ca6286e3e4f4fba5d0448333fa99fc5a404a73. Direct fetching of that commit failed.

because the fetch in .git/modules/a is interfered with by
.git/modules/a/b.

[...]
> --- a/submodule.c
> +++ b/submodule.c
[...]
> @@ -1933,9 +1938,29 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule)
>  			goto cleanup;
>  		}
>  		strbuf_reset(buf);
> -		strbuf_git_path(buf, "%s/%s", "modules", sub->name);
> +		strbuf_submodule_gitdir(buf, the_repository, sub->name);
>  	}
>  
>  cleanup:
>  	return ret;
>  }
> +
> +void strbuf_submodule_gitdir(struct strbuf *buf, struct repository *r,
> +			     const char *submodule_name)
> +{
> +	int modules_len;

nit: size_t

> +
> +	strbuf_git_common_path(buf, r, "modules/");
> +	modules_len = buf->len;
> +	strbuf_addstr(buf, submodule_name);
> +
> +	/*
> +	 * If the submodule gitdir already exists using the old location then
> +	 * return that.
> +	 */

nit: "old-fashioned location" or something.  Maybe the function could
use an API comment describing what's going on (that there are two
naming conventions and we try first the old, then the new).

Should we validate the submodule_name here when accessing following the old
convention?

> +	if (!access(buf->buf, F_OK))
> +		return;
> +
> +	strbuf_setlen(buf, modules_len);
> +	strbuf_addstr_urlencode(buf, submodule_name, 1);
> +}
[...]
> --- a/t/t7400-submodule-basic.sh
> +++ b/t/t7400-submodule-basic.sh
> @@ -932,7 +932,7 @@ test_expect_success 'recursive relative submodules stay relative' '
>  		cd clone2 &&
>  		git submodule update --init --recursive &&
>  		echo "gitdir: ../.git/modules/sub3" >./sub3/.git_expect &&
> -		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect
> +		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect
>  	) &&
>  	test_cmp clone2/sub3/.git_expect clone2/sub3/.git &&
>  	test_cmp clone2/sub3/dirdir/subsub/.git_expect clone2/sub3/dirdir/subsub/.git

Sensible.

Can there be a test of the compatibility code as well?  (I mean a test
that manually sets up a submodule in .git/modules/dirdir/subsub and
ensures that it gets reused.)

I'll apply this, experiment with it, and report back.  Thanks for
writing it.

Sincerely,
Jonathan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC] submodule: munge paths to submodule git directories
  2018-08-07 23:06 [RFC] submodule: munge paths to submodule git directories Brandon Williams
  2018-08-07 23:25 ` Jonathan Nieder
@ 2018-08-08  0:14 ` Junio C Hamano
  2018-08-08 22:33 ` [PATCH 0/2] munge submodule names Brandon Williams
  2019-01-15  1:25 ` [RFC] " Jonathan Nieder
  3 siblings, 0 replies; 40+ messages in thread
From: Junio C Hamano @ 2018-08-08  0:14 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

> Introduce a function "strbuf_submodule_gitdir()" which callers can use
> to build a path to a submodule's gitdir.  This allows for a single
> location where we can munge the submodule name (by url encoding it)
> before using it as part of a path.

I am not sure about the name with "strbuf_" prefix; it is as bad as
using hungarian notation for variable names.

There probably are some existing offenders, but it is merely an
implementation detail (or a function signature) that the returned
value is communicated using a strbuf (contrast it with things like
strbuf_add() that is _about_ doing something to a strbuf), and in
the longer term I prefer to see them lose "strbuf_" from their names
and optionally use the same number of bytes to describe what they do
more clearly.  For this particular case, "submodule" and "gitdir"
are sufficient to signal what the function is about, I think, so the
"optionally use..." is not necessary---instead we get a name that is
shorte to type and to remember.

> Using submodule names as is continues to be not such a good idea.  Maybe
> we could apply something like this to stop using them as is.  url
> encoding seems like the easiest approach, but I've also heard
> suggestions that would could use the SHA1 of the submodule name.

Being human readable is a good trait to keep when possible.  

When you have two submodules with vastly different names
(e.g. "hello" and "bye"), and for some reason you need to go in to
.gitmodules and manually fix their entries up, "hash of name" does
not help you avoid mistakes (hashing "hello" and hashing "helo"
would give a name as different as hashing "bye", so when you see
[module "hel$something"] in .gitmodules, you would know that entry
is not about the "bye" module, but "hello" module, even if you do
not remember exactly if the module you want to manipulate was called
"hello" or "helo").  The same discussion applies against UUID.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/2] munge submodule names
  2018-08-07 23:06 [RFC] submodule: munge paths to submodule git directories Brandon Williams
  2018-08-07 23:25 ` Jonathan Nieder
  2018-08-08  0:14 ` Junio C Hamano
@ 2018-08-08 22:33 ` Brandon Williams
  2018-08-08 22:33   ` [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs Brandon Williams
  2018-08-08 22:33   ` [PATCH 2/2] submodule: munge paths to submodule git directories Brandon Williams
  2019-01-15  1:25 ` [RFC] " Jonathan Nieder
  3 siblings, 2 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-08 22:33 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Here's a more polished series taking into account some of the feedback
on the RFC.  As Junio pointed out URL encoding makes the directories
much more human readable, but I'm open to other ideas if we don't think
URL encoding is the right thing to do.

Brandon Williams (2):
  submodule: create helper to build paths to submodule gitdirs
  submodule: munge paths to submodule git directories

 builtin/submodule--helper.c      | 28 +++++++++++--
 dir.c                            |  2 +-
 git-submodule.sh                 |  7 ++--
 repository.c                     |  3 +-
 submodule.c                      | 67 ++++++++++++++++++++++----------
 submodule.h                      |  7 ++++
 t/t7400-submodule-basic.sh       | 32 ++++++++++++++-
 t/t7406-submodule-update.sh      | 21 +++-------
 t/t7410-submodule-checkout-to.sh |  6 ++-
 9 files changed, 126 insertions(+), 47 deletions(-)

-- 
2.18.0.597.ga71716f1ad-goog


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs
  2018-08-08 22:33 ` [PATCH 0/2] munge submodule names Brandon Williams
@ 2018-08-08 22:33   ` Brandon Williams
  2018-08-08 23:21     ` Stefan Beller
  2018-08-10 21:27     ` Junio C Hamano
  2018-08-08 22:33   ` [PATCH 2/2] submodule: munge paths to submodule git directories Brandon Williams
  1 sibling, 2 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-08 22:33 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Introduce a helper function "submodule_name_to_gitdir()" (and the
submodule--helper subcommand "gitdir") which constructs a path to a
submodule's gitdir, located in the provided repository's "modules"
directory.

This consolidates the logic needed to build up a path into a
repository's "modules" directory, abstracting away the fact that
submodule git directories are stored in a repository's common gitdir.
This makes it easier to adjust how submodules gitdir are stored in the
"modules" directory in a future patch.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 builtin/submodule--helper.c      | 28 +++++++++++++++--
 dir.c                            |  2 +-
 git-submodule.sh                 |  7 +++--
 repository.c                     |  3 +-
 submodule.c                      | 53 ++++++++++++++++++++------------
 submodule.h                      |  7 +++++
 t/t7410-submodule-checkout-to.sh |  6 ++--
 7 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index a3c4564c6c..5bfd2d0be9 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -906,6 +906,21 @@ static int module_name(int argc, const char **argv, const char *prefix)
 	return 0;
 }
 
+static int module_gitdir(int argc, const char **argv, const char *prefix)
+{
+	struct strbuf gitdir = STRBUF_INIT;
+
+	if (argc != 2)
+		usage(_("git submodule--helper gitdir <name>"));
+
+	submodule_name_to_gitdir(&gitdir, the_repository, argv[1]);
+
+	printf("%s\n", gitdir.buf);
+
+	strbuf_release(&gitdir);
+	return 0;
+}
+
 struct sync_cb {
 	const char *prefix;
 	unsigned int flags;
@@ -1268,18 +1283,24 @@ static int add_possible_reference_from_superproject(
 	 * standard layout with .git/(modules/<name>)+/objects
 	 */
 	if (ends_with(alt->path, "/objects")) {
+		struct repository alternate;
 		char *sm_alternate;
 		struct strbuf sb = STRBUF_INIT;
 		struct strbuf err = STRBUF_INIT;
 		strbuf_add(&sb, alt->path, strlen(alt->path) - strlen("objects"));
 
+		repo_init(&alternate, sb.buf, NULL);
+
 		/*
 		 * We need to end the new path with '/' to mark it as a dir,
 		 * otherwise a submodule name containing '/' will be broken
 		 * as the last part of a missing submodule reference would
 		 * be taken as a file name.
 		 */
-		strbuf_addf(&sb, "modules/%s/", sas->submodule_name);
+		strbuf_reset(&sb);
+		submodule_name_to_gitdir(&sb, &alternate, sas->submodule_name);
+		strbuf_addch(&sb, '/');
+		repo_clear(&alternate);
 
 		sm_alternate = compute_alternate_path(sb.buf, &err);
 		if (sm_alternate) {
@@ -1392,7 +1413,7 @@ static int module_clone(int argc, const char **argv, const char *prefix)
 		usage_with_options(git_submodule_helper_usage,
 				   module_clone_options);
 
-	strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name);
+	submodule_name_to_gitdir(&sb, the_repository, name);
 	sm_gitdir = absolute_pathdup(sb.buf);
 	strbuf_reset(&sb);
 
@@ -2018,7 +2039,7 @@ static int connect_gitdir_workingtree(int argc, const char **argv, const char *p
 	name = argv[1];
 	path = argv[2];
 
-	strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name);
+	submodule_name_to_gitdir(&sb, the_repository, name);
 	sm_gitdir = absolute_pathdup(sb.buf);
 
 	connect_work_tree_and_git_dir(path, sm_gitdir, 0);
@@ -2040,6 +2061,7 @@ struct cmd_struct {
 static struct cmd_struct commands[] = {
 	{"list", module_list, 0},
 	{"name", module_name, 0},
+	{"gitdir", module_gitdir, 0},
 	{"clone", module_clone, 0},
 	{"update-clone", update_clone, 0},
 	{"connect-gitdir-workingtree", connect_gitdir_workingtree, 0},
diff --git a/dir.c b/dir.c
index 21e6f2520a..7a9827ea4b 100644
--- a/dir.c
+++ b/dir.c
@@ -3053,7 +3053,7 @@ static void connect_wt_gitdir_in_nested(const char *sub_worktree,
 		strbuf_reset(&sub_wt);
 		strbuf_reset(&sub_gd);
 		strbuf_addf(&sub_wt, "%s/%s", sub_worktree, sub->path);
-		strbuf_addf(&sub_gd, "%s/modules/%s", sub_gitdir, sub->name);
+		submodule_name_to_gitdir(&sub_gd, &subrepo, sub->name);
 
 		connect_work_tree_and_git_dir(sub_wt.buf, sub_gd.buf, 1);
 	}
diff --git a/git-submodule.sh b/git-submodule.sh
index 8b5ad59bde..053747d290 100755
--- a/git-submodule.sh
+++ b/git-submodule.sh
@@ -252,12 +252,13 @@ Use -f if you really want to add it." >&2
 		fi
 
 	else
-		if test -d ".git/modules/$sm_name"
+		sm_gitdir="$(git submodule--helper gitdir "$sm_name")"
+		if test -d "$sm_gitdir"
 		then
 			if test -z "$force"
 			then
 				eval_gettextln >&2 "A git directory for '\$sm_name' is found locally with remote(s):"
-				GIT_DIR=".git/modules/$sm_name" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
+				GIT_DIR="$sm_gitdir" GIT_WORK_TREE=. git remote -v | grep '(fetch)' | sed -e s,^,"  ", -e s,' (fetch)',, >&2
 				die "$(eval_gettextln "\
 If you want to reuse this local git directory instead of cloning again from
   \$realrepo
@@ -577,7 +578,7 @@ cmd_update()
 			die "$(eval_gettext "Unable to find current \${remote_name}/\${branch} revision in submodule path '\$sm_path'")"
 		fi
 
-		if ! $(git config -f "$(git rev-parse --git-common-dir)/modules/$name/config" core.worktree) 2>/dev/null
+		if ! $(git config -f "$(git submodule--helper gitdir "$name")/config" core.worktree) 2>/dev/null
 		then
 			git submodule--helper connect-gitdir-workingtree "$name" "$sm_path"
 		fi
diff --git a/repository.c b/repository.c
index 5dd1486718..9da3c9d4b7 100644
--- a/repository.c
+++ b/repository.c
@@ -198,8 +198,7 @@ int repo_submodule_init(struct repository *submodule,
 		 * submodule would not have a worktree.
 		 */
 		strbuf_reset(&gitdir);
-		strbuf_repo_git_path(&gitdir, superproject,
-				     "modules/%s", sub->name);
+		submodule_name_to_gitdir(&gitdir, superproject, sub->name);
 
 		if (repo_init(submodule, gitdir.buf, NULL)) {
 			ret = -1;
diff --git a/submodule.c b/submodule.c
index 6e14547e9e..24eced34e7 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1536,14 +1536,15 @@ int bad_to_remove_submodule(const char *path, unsigned flags)
 
 void submodule_unset_core_worktree(const struct submodule *sub)
 {
-	char *config_path = xstrfmt("%s/modules/%s/config",
-				    get_git_common_dir(), sub->name);
+	struct strbuf config_path = STRBUF_INIT;
+	submodule_name_to_gitdir(&config_path, the_repository, sub->name);
+	strbuf_addstr(&config_path, "/config");
 
-	if (git_config_set_in_file_gently(config_path, "core.worktree", NULL))
+	if (git_config_set_in_file_gently(config_path.buf, "core.worktree", NULL))
 		warning(_("Could not unset core.worktree setting in submodule '%s'"),
 			  sub->path);
 
-	free(config_path);
+	strbuf_release(&config_path);
 }
 
 static const char *get_super_prefix_or_empty(void)
@@ -1639,20 +1640,22 @@ int submodule_move_head(const char *path,
 				absorb_git_dir_into_superproject("", path,
 					ABSORB_GITDIR_RECURSE_SUBMODULES);
 		} else {
-			char *gitdir = xstrfmt("%s/modules/%s",
-				    get_git_common_dir(), sub->name);
-			connect_work_tree_and_git_dir(path, gitdir, 0);
-			free(gitdir);
+			struct strbuf gitdir = STRBUF_INIT;
+			submodule_name_to_gitdir(&gitdir, the_repository,
+						 sub->name);
+			connect_work_tree_and_git_dir(path, gitdir.buf, 0);
+			strbuf_release(&gitdir);
 
 			/* make sure the index is clean as well */
 			submodule_reset_index(path);
 		}
 
 		if (old_head && (flags & SUBMODULE_MOVE_HEAD_FORCE)) {
-			char *gitdir = xstrfmt("%s/modules/%s",
-				    get_git_common_dir(), sub->name);
-			connect_work_tree_and_git_dir(path, gitdir, 1);
-			free(gitdir);
+			struct strbuf gitdir = STRBUF_INIT;
+			submodule_name_to_gitdir(&gitdir, the_repository,
+						 sub->name);
+			connect_work_tree_and_git_dir(path, gitdir.buf, 1);
+			strbuf_release(&gitdir);
 		}
 	}
 
@@ -1727,7 +1730,7 @@ static void relocate_single_git_dir_into_superproject(const char *prefix,
 						      const char *path)
 {
 	char *old_git_dir = NULL, *real_old_git_dir = NULL, *real_new_git_dir = NULL;
-	const char *new_git_dir;
+	struct strbuf new_gitdir = STRBUF_INIT;
 	const struct submodule *sub;
 
 	if (submodule_uses_worktrees(path))
@@ -1745,10 +1748,10 @@ static void relocate_single_git_dir_into_superproject(const char *prefix,
 	if (!sub)
 		die(_("could not lookup name for submodule '%s'"), path);
 
-	new_git_dir = git_path("modules/%s", sub->name);
-	if (safe_create_leading_directories_const(new_git_dir) < 0)
-		die(_("could not create directory '%s'"), new_git_dir);
-	real_new_git_dir = real_pathdup(new_git_dir, 1);
+	submodule_name_to_gitdir(&new_gitdir, the_repository, sub->name);
+	if (safe_create_leading_directories_const(new_gitdir.buf) < 0)
+		die(_("could not create directory '%s'"), new_gitdir.buf);
+	real_new_git_dir = real_pathdup(new_gitdir.buf, 1);
 
 	fprintf(stderr, _("Migrating git directory of '%s%s' from\n'%s' to\n'%s'\n"),
 		get_super_prefix_or_empty(), path,
@@ -1759,6 +1762,7 @@ static void relocate_single_git_dir_into_superproject(const char *prefix,
 	free(old_git_dir);
 	free(real_old_git_dir);
 	free(real_new_git_dir);
+	strbuf_release(&new_gitdir);
 }
 
 /*
@@ -1779,6 +1783,7 @@ void absorb_git_dir_into_superproject(const char *prefix,
 	/* Not populated? */
 	if (!sub_git_dir) {
 		const struct submodule *sub;
+		struct strbuf sub_gitdir = STRBUF_INIT;
 
 		if (err_code == READ_GITFILE_ERR_STAT_FAILED) {
 			/* unpopulated as expected */
@@ -1800,8 +1805,9 @@ void absorb_git_dir_into_superproject(const char *prefix,
 		sub = submodule_from_path(the_repository, &null_oid, path);
 		if (!sub)
 			die(_("could not lookup name for submodule '%s'"), path);
-		connect_work_tree_and_git_dir(path,
-			git_path("modules/%s", sub->name), 0);
+		submodule_name_to_gitdir(&sub_gitdir, the_repository, sub->name);
+		connect_work_tree_and_git_dir(path, sub_gitdir.buf, 0);
+		strbuf_release(&sub_gitdir);
 	} else {
 		/* Is it already absorbed into the superprojects git dir? */
 		char *real_sub_git_dir = real_pathdup(sub_git_dir, 1);
@@ -1949,9 +1955,16 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule)
 			goto cleanup;
 		}
 		strbuf_reset(buf);
-		strbuf_git_path(buf, "%s/%s", "modules", sub->name);
+		submodule_name_to_gitdir(buf, the_repository, sub->name);
 	}
 
 cleanup:
 	return ret;
 }
+
+void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
+			      const char *submodule_name)
+{
+	strbuf_git_common_path(buf, r, "modules/");
+	strbuf_addstr(buf, submodule_name);
+}
diff --git a/submodule.h b/submodule.h
index 4644683e6c..0de9410dda 100644
--- a/submodule.h
+++ b/submodule.h
@@ -114,6 +114,13 @@ extern int push_unpushed_submodules(struct oid_array *commits,
  */
 int submodule_to_gitdir(struct strbuf *buf, const char *submodule);
 
+/*
+ * Given a submodule name, create a path to where the submodule's gitdir lives
+ * inside of the provided repository's 'modules' directory.
+ */
+void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
+			      const char *submodule_name);
+
 #define SUBMODULE_MOVE_HEAD_DRY_RUN (1<<0)
 #define SUBMODULE_MOVE_HEAD_FORCE   (1<<1)
 extern int submodule_move_head(const char *path,
diff --git a/t/t7410-submodule-checkout-to.sh b/t/t7410-submodule-checkout-to.sh
index 1acef32647..c408e9010a 100755
--- a/t/t7410-submodule-checkout-to.sh
+++ b/t/t7410-submodule-checkout-to.sh
@@ -35,8 +35,10 @@ test_expect_success 'checkout main' \
     (cd clone/main &&
 	git worktree add "$base_path/default_checkout/main" "$rev1_hash_main")'
 
-test_expect_failure 'can see submodule diffs just after checkout' \
-    '(cd default_checkout/main && git diff --submodule master"^!" | grep "file1 updated")'
+test_expect_success 'can see submodule diffs just after checkout' '
+	git -C default_checkout/main diff --submodule master"^!" >out &&
+	grep "file1 updated" out
+'
 
 test_expect_success 'checkout main and initialize independed clones' \
     'mkdir fully_cloned_submodule &&
-- 
2.18.0.597.ga71716f1ad-goog


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-08 22:33 ` [PATCH 0/2] munge submodule names Brandon Williams
  2018-08-08 22:33   ` [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs Brandon Williams
@ 2018-08-08 22:33   ` Brandon Williams
  2018-08-09 21:26     ` Jeff King
  2018-08-16  0:19     ` Aaron Schrab
  1 sibling, 2 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-08 22:33 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Commit 0383bbb901 (submodule-config: verify submodule names as paths,
2018-04-30) introduced some checks to ensure that submodule names don't
include directory traversal components (e.g. "../").

This addresses the vulnerability identified in 0383bbb901 but the root
cause is that we use submodule names to construct paths to the
submodule's git directory.  What we really should do is munge the
submodule name before using it to construct a path.

Teach "submodule_name_to_gitdir()" to munge a submodule's name (by url
encoding it) before using it to build a path to the submodule's gitdir.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 submodule.c                 | 14 ++++++++++++++
 t/t7400-submodule-basic.sh  | 32 +++++++++++++++++++++++++++++++-
 t/t7406-submodule-update.sh | 21 ++++++---------------
 3 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/submodule.c b/submodule.c
index 24eced34e7..4854d88ce8 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1965,6 +1965,20 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule)
 void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
 			      const char *submodule_name)
 {
+	size_t modules_len;
+
 	strbuf_git_common_path(buf, r, "modules/");
+	modules_len = buf->len;
 	strbuf_addstr(buf, submodule_name);
+
+	/*
+	 * If the submodule gitdir already exists using the old-fashioned
+	 * location (which uses the submodule name as-is, without munging it)
+	 * then return that.
+	 */
+	if (!access(buf->buf, F_OK))
+		return;
+
+	strbuf_setlen(buf, modules_len);
+	strbuf_addstr_urlencode(buf, submodule_name, 1);
 }
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index 2c2c97e144..963693332c 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -933,7 +933,7 @@ test_expect_success 'recursive relative submodules stay relative' '
 		cd clone2 &&
 		git submodule update --init --recursive &&
 		echo "gitdir: ../.git/modules/sub3" >./sub3/.git_expect &&
-		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect
+		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect
 	) &&
 	test_cmp clone2/sub3/.git_expect clone2/sub3/.git &&
 	test_cmp clone2/sub3/dirdir/subsub/.git_expect clone2/sub3/dirdir/subsub/.git
@@ -1324,4 +1324,34 @@ test_expect_success 'recursive clone respects -q' '
 	test_must_be_empty actual
 '
 
+test_expect_success 'resolve submodule gitdir in superprojects modules directory' '
+	test_when_finished "rm -rf superproject submodule" &&
+
+	# Create a superproject with a submodule which contains a "/"
+	test_create_repo submodule &&
+	test_commit -C submodule one &&
+	test_create_repo superproject &&
+	git -C superproject submodule add ../submodule sub/module &&
+	git -C superproject commit -m "add submodule" &&
+
+	# "/" characters in submodule names are properly urlencoded before
+	# being used to construct a path to the submodules gitdir.
+	cat >expect <<-EOF &&
+	$(git -C superproject rev-parse --git-common-dir)/modules/sub%2fmodule
+	EOF
+	git -C superproject submodule--helper gitdir "sub/module" >actual &&
+	test_cmp expect actual &&
+	test_path_is_dir "superproject/.git/modules/sub%2fmodule" &&
+
+	# Test the old-fashioned way of storing submodules in the
+	# "modules" directory by directly renaming the submodules gitdir
+	mkdir superproject/.git/modules/sub/ &&
+	mv superproject/.git/modules/sub%2fmodule superproject/.git/modules/sub/module &&
+	cat >expect <<-EOF &&
+	$(git -C superproject rev-parse --git-common-dir)/modules/sub/module
+	EOF
+	git -C superproject submodule--helper gitdir "sub/module" >actual &&
+	test_cmp expect actual
+'
+
 test_done
diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh
index f604ef7a72..fb744c5c39 100755
--- a/t/t7406-submodule-update.sh
+++ b/t/t7406-submodule-update.sh
@@ -777,12 +777,8 @@ test_expect_success 'submodule add places git-dir in superprojects git-dir' '
 	(cd super &&
 	 mkdir deeper &&
 	 git submodule add ../submodule deeper/submodule &&
-	 (cd deeper/submodule &&
-	  git log > ../../expected
-	 ) &&
-	 (cd .git/modules/deeper/submodule &&
-	  git log > ../../../../actual
-	 ) &&
+	 git -C deeper/submodule log >expected &&
+	 git -C .git/modules/deeper%2fsubmodule log >actual &&
 	 test_cmp actual expected
 	)
 '
@@ -795,12 +791,9 @@ test_expect_success 'submodule update places git-dir in superprojects git-dir' '
 	(cd super2 &&
 	 git submodule init deeper/submodule &&
 	 git submodule update &&
-	 (cd deeper/submodule &&
-	  git log > ../../expected
-	 ) &&
-	 (cd .git/modules/deeper/submodule &&
-	  git log > ../../../../actual
-	 ) &&
+
+	 git -C deeper/submodule log >expected &&
+	 git -C .git/modules/deeper%2fsubmodule log >actual &&
 	 test_cmp actual expected
 	)
 '
@@ -815,9 +808,7 @@ test_expect_success 'submodule add places git-dir in superprojects git-dir recur
 	  git commit -m "added subsubmodule" &&
 	  git push origin :
 	 ) &&
-	 (cd .git/modules/deeper/submodule/modules/subsubmodule &&
-	  git log > ../../../../../actual
-	 ) &&
+	 git -C .git/modules/deeper%2fsubmodule/modules/subsubmodule log >actual &&
 	 git add deeper/submodule &&
 	 git commit -m "update submodule" &&
 	 git push origin : &&
-- 
2.18.0.597.ga71716f1ad-goog


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs
  2018-08-08 22:33   ` [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs Brandon Williams
@ 2018-08-08 23:21     ` Stefan Beller
  2018-08-09  0:45       ` Brandon Williams
  2018-08-10 21:27     ` Junio C Hamano
  1 sibling, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-08 23:21 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

On Wed, Aug 8, 2018 at 3:33 PM Brandon Williams <bmwill@google.com> wrote:
>
> Introduce a helper function "submodule_name_to_gitdir()" (and the
> submodule--helper subcommand "gitdir") which constructs a path to a
> submodule's gitdir, located in the provided repository's "modules"
> directory.

Makes sense.

>
> This consolidates the logic needed to build up a path into a
> repository's "modules" directory, abstracting away the fact that
> submodule git directories are stored in a repository's common gitdir.
> This makes it easier to adjust how submodules gitdir are stored in the
> "modules" directory in a future patch.

and yet, all places that we touch were and still are broken for old-style
submodules that have their git directory inside the working tree?
Do we need to pay attention to those, too?


> diff --git a/git-submodule.sh b/git-submodule.sh
> index 8b5ad59bde..053747d290 100755
> --- a/git-submodule.sh
> +++ b/git-submodule.sh

> @@ -577,7 +578,7 @@ cmd_update()
>                         die "$(eval_gettext "Unable to find current \${remote_name}/\${branch} revision in submodule path '\$sm_path'")"
>                 fi
>
> -               if ! $(git config -f "$(git rev-parse --git-common-dir)/modules/$name/config" core.worktree) 2>/dev/null
> +               if ! $(git config -f "$(git submodule--helper gitdir "$name")/config" core.worktree) 2>/dev/null

This will collide with origin/sb/submodule-update-in-c specifically
1c866b9831d (submodule--helper: replace connect-gitdir-workingtree
by ensure-core-worktree, 2018-08-03), but as that removes these lines,
it should be easy to resolve the conflict.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs
  2018-08-08 23:21     ` Stefan Beller
@ 2018-08-09  0:45       ` Brandon Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-09  0:45 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git

On 08/08, Stefan Beller wrote:
> On Wed, Aug 8, 2018 at 3:33 PM Brandon Williams <bmwill@google.com> wrote:
> >
> > Introduce a helper function "submodule_name_to_gitdir()" (and the
> > submodule--helper subcommand "gitdir") which constructs a path to a
> > submodule's gitdir, located in the provided repository's "modules"
> > directory.
> 
> Makes sense.
> 
> >
> > This consolidates the logic needed to build up a path into a
> > repository's "modules" directory, abstracting away the fact that
> > submodule git directories are stored in a repository's common gitdir.
> > This makes it easier to adjust how submodules gitdir are stored in the
> > "modules" directory in a future patch.
> 
> and yet, all places that we touch were and still are broken for old-style
> submodules that have their git directory inside the working tree?
> Do we need to pay attention to those, too?

This series only tries to address the issues with submodules stored in
$GITDIR/modules/ and places in our codebase that explicitly reference
submodules stored there.

For those old-old-style submodules, wouldn't the absorb submodule
functions handle that migration?

> 
> 
> > diff --git a/git-submodule.sh b/git-submodule.sh
> > index 8b5ad59bde..053747d290 100755
> > --- a/git-submodule.sh
> > +++ b/git-submodule.sh
> 
> > @@ -577,7 +578,7 @@ cmd_update()
> >                         die "$(eval_gettext "Unable to find current \${remote_name}/\${branch} revision in submodule path '\$sm_path'")"
> >                 fi
> >
> > -               if ! $(git config -f "$(git rev-parse --git-common-dir)/modules/$name/config" core.worktree) 2>/dev/null
> > +               if ! $(git config -f "$(git submodule--helper gitdir "$name")/config" core.worktree) 2>/dev/null
> 
> This will collide with origin/sb/submodule-update-in-c specifically
> 1c866b9831d (submodule--helper: replace connect-gitdir-workingtree
> by ensure-core-worktree, 2018-08-03), but as that removes these lines,
> it should be easy to resolve the conflict.

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-08 22:33   ` [PATCH 2/2] submodule: munge paths to submodule git directories Brandon Williams
@ 2018-08-09 21:26     ` Jeff King
  2018-08-14 18:04       ` Brandon Williams
  2018-08-16  0:19     ` Aaron Schrab
  1 sibling, 1 reply; 40+ messages in thread
From: Jeff King @ 2018-08-09 21:26 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

On Wed, Aug 08, 2018 at 03:33:23PM -0700, Brandon Williams wrote:

> Commit 0383bbb901 (submodule-config: verify submodule names as paths,
> 2018-04-30) introduced some checks to ensure that submodule names don't
> include directory traversal components (e.g. "../").
> 
> This addresses the vulnerability identified in 0383bbb901 but the root
> cause is that we use submodule names to construct paths to the
> submodule's git directory.  What we really should do is munge the
> submodule name before using it to construct a path.
> 
> Teach "submodule_name_to_gitdir()" to munge a submodule's name (by url
> encoding it) before using it to build a path to the submodule's gitdir.

I like this approach very much, and I think using url encoding is much
better than an opaque hash (purely because it makes debugging and
inspection saner).

Two thoughts, though:

> +	modules_len = buf->len;
>  	strbuf_addstr(buf, submodule_name);
> +
> +	/*
> +	 * If the submodule gitdir already exists using the old-fashioned
> +	 * location (which uses the submodule name as-is, without munging it)
> +	 * then return that.
> +	 */
> +	if (!access(buf->buf, F_OK))
> +		return;

I think this backwards-compatibility is necessary to avoid pain. But
until it goes away, I don't think this is helping the vulnerability from
0383bbb901. Because there the issue was that the submodule name pointed
back into the working tree, so this access() would find the untrusted
working tree code and say "ah, an old-fashioned name!".

In theory a fresh clone could set a config option for "I only speak
use new-style modules". And there could even be a conversion program
that moves the modules as appropriate, fixes up the .git files in the
working tree, and then sets that config.

In fact, I think that config option _could_ be done by bumping
core.repositoryformatversion and then setting extensions.submodulenames
to "url" or something. Then you could never run into the confusing case
where you have a clone done by a new version of git (using new-style
names), but using an old-style version gets confused because it can't
find the module directories (instead, it would barf and say "I don't
know about that extension").

I don't know if any of that is worth it, though. We already fixed the
problem from 0383bbb901. There may be a _different_ "break out of the
modules directory" vulnerability, but since we disallow ".." it's hard
to see what it would be (the best I could come up with is maybe pointing
one module into the interior of another module, but I think you'd have
to trouble overwriting anything useful).

And while an old-style version of Git being confused might be annoying,
I suspect that bumping the repository version would be even _more_
annoying, because it would hit every command, not just ones that try to
touch those submodules.

> diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
> index 2c2c97e144..963693332c 100755
> --- a/t/t7400-submodule-basic.sh
> +++ b/t/t7400-submodule-basic.sh
> @@ -933,7 +933,7 @@ test_expect_success 'recursive relative submodules stay relative' '
>  		cd clone2 &&
>  		git submodule update --init --recursive &&
>  		echo "gitdir: ../.git/modules/sub3" >./sub3/.git_expect &&
> -		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect
> +		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect

One interesting thing about url-encoding is that it's not one-to-one.
This case could also be %2F, which is a different file (on a
case-sensitive filesystem). I think "%20" and "+" are similarly
interchangeable.

If we were decoding the filenames, that's fine. The round-trip is
lossless.

But that's not quite how the new code behaves. We encode the input and
then check to see if it matches an encoding we previously performed. So
if our urlencode routines ever change, this will subtly break.

I don't know how much it's worth caring about. We're not that likely to
change the routines ourself (though certainly a third-party
implementation would need to know our exact url-encoding decisions).

Some possible actions:

 0. Do nothing, and cross our fingers. ;)

 1. Don't use strbuf_addstr_urlencode(), but rather our own munging
    function which we know will remain stable (or alternatively, a flag
    to strbuf_addstr_urlencode to get the consistent behavior).

 2. Make sure we have tests which cover this, so at least somebody
    changing the urlencode decisions will see a breakage. Your test here
    covers the upper/lowercase one, but we might want one that covers
    "+". (There may be more ambiguous cases, but those are the ones I
    know about).

 3. Rather than check for the existence of names, decode what's actually
    in the modules/ directory to create an in-memory index of names.

    I hesitate to suggest that, because it's obviously way more
    complicated, and may perform worse if you have a lot of modules
    (since you have to readdir() and decode the whole directory just to
    look up one module).

    But I think it also gives a more elegant solution to the
    backwards-compatibility problem, since we could recognize both new
    and old-style names. There's some ambiguity (e.g., is "foo%2fbar"
    "foo/bar", or did somebody really have a name with a percent in
    it?),. but in theory you could respect either name (giving
    preference to new-style in case of a conflict).

    And I think the result would be immune to any directory-escape
    vulnerabilities, because we'd always start with what actually exists
    in $GIT_DIR/modules/, which we know _we_ will have written.

    Again, I'm not sure if it's worth the effort, but I thought I'd
    throw it out there.

-Peff

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs
  2018-08-08 22:33   ` [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs Brandon Williams
  2018-08-08 23:21     ` Stefan Beller
@ 2018-08-10 21:27     ` Junio C Hamano
  2018-08-10 21:45       ` Brandon Williams
  1 sibling, 1 reply; 40+ messages in thread
From: Junio C Hamano @ 2018-08-10 21:27 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git, Stefan Beller

Brandon Williams <bmwill@google.com> writes:

> Introduce a helper function "submodule_name_to_gitdir()" (and the
> submodule--helper subcommand "gitdir") which constructs a path to a
> submodule's gitdir, located in the provided repository's "modules"
> directory.
>
> This consolidates the logic needed to build up a path into a
> repository's "modules" directory, abstracting away the fact that
> submodule git directories are stored in a repository's common gitdir.
> This makes it easier to adjust how submodules gitdir are stored in the
> "modules" directory in a future patch.
>
> Signed-off-by: Brandon Williams <bmwill@google.com>
> ---
> ...
> @@ -2018,7 +2039,7 @@ static int connect_gitdir_workingtree(int argc, const char **argv, const char *p
>  	name = argv[1];
>  	path = argv[2];
>  
> -	strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name);
> +	submodule_name_to_gitdir(&sb, the_repository, name);
>  	sm_gitdir = absolute_pathdup(sb.buf);
>  
>  	connect_work_tree_and_git_dir(path, sm_gitdir, 0);

This function goes away with 1c866b98 ("submodule--helper: replace
connect-gitdir-workingtree by ensure-core-worktree", 2018-08-03) in
sb/submodule-update-in-c topic.  git-submodule.sh has simlar
conflicts.

I guess its replacement function does not care as deeply as its
predecessor used to about where the submodule's $GIT_DIR is, so the
correct resolution may be just to ignore the change made to this
caller to the new name-to-gitdir function.

It would have been nicer to see a bit better inter-developer
coordination, especially between two who sit practically next to
each other ;-)

Thanks.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs
  2018-08-10 21:27     ` Junio C Hamano
@ 2018-08-10 21:45       ` Brandon Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-10 21:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Stefan Beller

On 08/10, Junio C Hamano wrote:
> Brandon Williams <bmwill@google.com> writes:
> 
> > Introduce a helper function "submodule_name_to_gitdir()" (and the
> > submodule--helper subcommand "gitdir") which constructs a path to a
> > submodule's gitdir, located in the provided repository's "modules"
> > directory.
> >
> > This consolidates the logic needed to build up a path into a
> > repository's "modules" directory, abstracting away the fact that
> > submodule git directories are stored in a repository's common gitdir.
> > This makes it easier to adjust how submodules gitdir are stored in the
> > "modules" directory in a future patch.
> >
> > Signed-off-by: Brandon Williams <bmwill@google.com>
> > ---
> > ...
> > @@ -2018,7 +2039,7 @@ static int connect_gitdir_workingtree(int argc, const char **argv, const char *p
> >  	name = argv[1];
> >  	path = argv[2];
> >  
> > -	strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name);
> > +	submodule_name_to_gitdir(&sb, the_repository, name);
> >  	sm_gitdir = absolute_pathdup(sb.buf);
> >  
> >  	connect_work_tree_and_git_dir(path, sm_gitdir, 0);
> 
> This function goes away with 1c866b98 ("submodule--helper: replace
> connect-gitdir-workingtree by ensure-core-worktree", 2018-08-03) in
> sb/submodule-update-in-c topic.  git-submodule.sh has simlar
> conflicts.
> 
> I guess its replacement function does not care as deeply as its
> predecessor used to about where the submodule's $GIT_DIR is, so the
> correct resolution may be just to ignore the change made to this
> caller to the new name-to-gitdir function.

Well that patch still cares about where the gitdir is except it
initializes a "struct repository" for the submodule and then builds a
path to the config using:

    cfg_file = xstrfmt("%s/config", subrepo.gitdir);

hmm...I didn't get a chance to look at that series but that line looks
wrong.  It probably should be more like:

  cfg_file = repo_git_path(&subrepo, "config");

I'll go comment on that series.

> 
> It would have been nicer to see a bit better inter-developer
> coordination, especially between two who sit practically next to
> each other ;-)
> 
> Thanks.

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-09 21:26     ` Jeff King
@ 2018-08-14 18:04       ` Brandon Williams
  2018-08-14 18:57         ` Jonathan Nieder
                           ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-14 18:04 UTC (permalink / raw)
  To: Jeff King; +Cc: git

On 08/09, Jeff King wrote:
> On Wed, Aug 08, 2018 at 03:33:23PM -0700, Brandon Williams wrote:
> 
> > Commit 0383bbb901 (submodule-config: verify submodule names as paths,
> > 2018-04-30) introduced some checks to ensure that submodule names don't
> > include directory traversal components (e.g. "../").
> > 
> > This addresses the vulnerability identified in 0383bbb901 but the root
> > cause is that we use submodule names to construct paths to the
> > submodule's git directory.  What we really should do is munge the
> > submodule name before using it to construct a path.
> > 
> > Teach "submodule_name_to_gitdir()" to munge a submodule's name (by url
> > encoding it) before using it to build a path to the submodule's gitdir.
> 
> I like this approach very much, and I think using url encoding is much
> better than an opaque hash (purely because it makes debugging and
> inspection saner).
> 
> Two thoughts, though:
> 
> > +	modules_len = buf->len;
> >  	strbuf_addstr(buf, submodule_name);
> > +
> > +	/*
> > +	 * If the submodule gitdir already exists using the old-fashioned
> > +	 * location (which uses the submodule name as-is, without munging it)
> > +	 * then return that.
> > +	 */
> > +	if (!access(buf->buf, F_OK))
> > +		return;
> 
> I think this backwards-compatibility is necessary to avoid pain. But
> until it goes away, I don't think this is helping the vulnerability from
> 0383bbb901. Because there the issue was that the submodule name pointed
> back into the working tree, so this access() would find the untrusted
> working tree code and say "ah, an old-fashioned name!".
> 
> In theory a fresh clone could set a config option for "I only speak
> use new-style modules". And there could even be a conversion program
> that moves the modules as appropriate, fixes up the .git files in the
> working tree, and then sets that config.
> 
> In fact, I think that config option _could_ be done by bumping
> core.repositoryformatversion and then setting extensions.submodulenames
> to "url" or something. Then you could never run into the confusing case
> where you have a clone done by a new version of git (using new-style
> names), but using an old-style version gets confused because it can't
> find the module directories (instead, it would barf and say "I don't
> know about that extension").
> 
> I don't know if any of that is worth it, though. We already fixed the
> problem from 0383bbb901. There may be a _different_ "break out of the
> modules directory" vulnerability, but since we disallow ".." it's hard
> to see what it would be (the best I could come up with is maybe pointing
> one module into the interior of another module, but I think you'd have
> to trouble overwriting anything useful).
> 
> And while an old-style version of Git being confused might be annoying,
> I suspect that bumping the repository version would be even _more_
> annoying, because it would hit every command, not just ones that try to
> touch those submodules.

Oh I know that this doesn't help with that vulnerability.  As you've
said we fix it and now disallow ".." at the submodule-config level so
really this path is simply about using what we get out of
submodule-config in a more sane manor.

> 
> > diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
> > index 2c2c97e144..963693332c 100755
> > --- a/t/t7400-submodule-basic.sh
> > +++ b/t/t7400-submodule-basic.sh
> > @@ -933,7 +933,7 @@ test_expect_success 'recursive relative submodules stay relative' '
> >  		cd clone2 &&
> >  		git submodule update --init --recursive &&
> >  		echo "gitdir: ../.git/modules/sub3" >./sub3/.git_expect &&
> > -		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect
> > +		echo "gitdir: ../../../.git/modules/sub3/modules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect
> 
> One interesting thing about url-encoding is that it's not one-to-one.
> This case could also be %2F, which is a different file (on a
> case-sensitive filesystem). I think "%20" and "+" are similarly
> interchangeable.
> 
> If we were decoding the filenames, that's fine. The round-trip is
> lossless.
> 
> But that's not quite how the new code behaves. We encode the input and
> then check to see if it matches an encoding we previously performed. So
> if our urlencode routines ever change, this will subtly break.
> 
> I don't know how much it's worth caring about. We're not that likely to
> change the routines ourself (though certainly a third-party
> implementation would need to know our exact url-encoding decisions).

This is exactly the reason why I wanted to get some opinions on what the
best thing to do here would be.  I _think_ the best thing would probably
be to write a specific routine to do the conversion, and it wouldn't
even have to be all that complex.  Basically I'm just interested in
converting '/' characters so that things no longer behave like
nested directories.

> 
> Some possible actions:
> 
>  0. Do nothing, and cross our fingers. ;)
> 
>  1. Don't use strbuf_addstr_urlencode(), but rather our own munging
>     function which we know will remain stable (or alternatively, a flag
>     to strbuf_addstr_urlencode to get the consistent behavior).
> 
>  2. Make sure we have tests which cover this, so at least somebody
>     changing the urlencode decisions will see a breakage. Your test here
>     covers the upper/lowercase one, but we might want one that covers
>     "+". (There may be more ambiguous cases, but those are the ones I
>     know about).
> 
>  3. Rather than check for the existence of names, decode what's actually
>     in the modules/ directory to create an in-memory index of names.
> 
>     I hesitate to suggest that, because it's obviously way more
>     complicated, and may perform worse if you have a lot of modules
>     (since you have to readdir() and decode the whole directory just to
>     look up one module).
> 
>     But I think it also gives a more elegant solution to the
>     backwards-compatibility problem, since we could recognize both new
>     and old-style names. There's some ambiguity (e.g., is "foo%2fbar"
>     "foo/bar", or did somebody really have a name with a percent in
>     it?),. but in theory you could respect either name (giving
>     preference to new-style in case of a conflict).
> 
>     And I think the result would be immune to any directory-escape
>     vulnerabilities, because we'd always start with what actually exists
>     in $GIT_DIR/modules/, which we know _we_ will have written.
> 
>     Again, I'm not sure if it's worth the effort, but I thought I'd
>     throw it out there.
> 
> -Peff

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 18:04       ` Brandon Williams
@ 2018-08-14 18:57         ` Jonathan Nieder
  2018-08-14 21:08           ` Stefan Beller
  2018-08-14 18:58         ` Jeff King
  2018-08-28 21:35         ` Stefan Beller
  2 siblings, 1 reply; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-14 18:57 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, git

Hi,

Brandon Williams wrote:
> On 08/09, Jeff King wrote:

>> One interesting thing about url-encoding is that it's not one-to-one.
>> This case could also be %2F, which is a different file (on a
>> case-sensitive filesystem). I think "%20" and "+" are similarly
>> interchangeable.
>>
>> If we were decoding the filenames, that's fine. The round-trip is
>> lossless.
>>
>> But that's not quite how the new code behaves. We encode the input and
>> then check to see if it matches an encoding we previously performed. So
>> if our urlencode routines ever change, this will subtly break.
>>
>> I don't know how much it's worth caring about. We're not that likely to
>> change the routines ourself (though certainly a third-party
>> implementation would need to know our exact url-encoding decisions).
>
> This is exactly the reason why I wanted to get some opinions on what the
> best thing to do here would be.  I _think_ the best thing would probably
> be to write a specific routine to do the conversion, and it wouldn't
> even have to be all that complex.  Basically I'm just interested in
> converting '/' characters so that things no longer behave like
> nested directories.

First of all, I think the behavior with this patch is already much
better than the previous status quo.  I'm using the patch now and am
very happy with it.

Second, what if we store the pathname in config?  We already store the
URL there:

	[submodule "plugins/hooks"]
		url = https://gerrit.googlesource.com/plugins/hooks

So we could (as a followup patch) do something like

	[submodule "plugins/hooks"]
		url = https://gerrit.googlesource.com/plugins/hooks
		gitdirname = plugins%2fhooks

and use that for lookups instead of regenerating the directory name.
What do you think?

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 18:04       ` Brandon Williams
  2018-08-14 18:57         ` Jonathan Nieder
@ 2018-08-14 18:58         ` Jeff King
  2018-08-28 21:35         ` Stefan Beller
  2 siblings, 0 replies; 40+ messages in thread
From: Jeff King @ 2018-08-14 18:58 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

On Tue, Aug 14, 2018 at 11:04:06AM -0700, Brandon Williams wrote:

> > I think this backwards-compatibility is necessary to avoid pain. But
> > until it goes away, I don't think this is helping the vulnerability from
> > 0383bbb901. Because there the issue was that the submodule name pointed
> > back into the working tree, so this access() would find the untrusted
> > working tree code and say "ah, an old-fashioned name!".
> [...]
> 
> Oh I know that this doesn't help with that vulnerability.  As you've
> said we fix it and now disallow ".." at the submodule-config level so
> really this path is simply about using what we get out of
> submodule-config in a more sane manor.

OK, I'm alright with that as long as we are all on the same page. I
think I mistook "this addresses the vulnerability" from your commit
message the wrong way. I took it as "this patch", but reading it again,
you simply mean "the '..' handling we already did".

I do think eventually dropping this back-compatibility could save us
from another directory-escape problem, but it's hard to justify the
real-world pain for a hypothetical benefit. Maybe in a few years we
could get rid of it in a major version bump.

> > One interesting thing about url-encoding is that it's not one-to-one.
> > This case could also be %2F, which is a different file (on a
> > case-sensitive filesystem). I think "%20" and "+" are similarly
> > interchangeable.
> > 
> > If we were decoding the filenames, that's fine. The round-trip is
> > lossless.
> > 
> > But that's not quite how the new code behaves. We encode the input and
> > then check to see if it matches an encoding we previously performed. So
> > if our urlencode routines ever change, this will subtly break.
> > 
> > I don't know how much it's worth caring about. We're not that likely to
> > change the routines ourself (though certainly a third-party
> > implementation would need to know our exact url-encoding decisions).
> 
> This is exactly the reason why I wanted to get some opinions on what the
> best thing to do here would be.  I _think_ the best thing would probably
> be to write a specific routine to do the conversion, and it wouldn't
> even have to be all that complex.  Basically I'm just interested in
> converting '/' characters so that things no longer behave like
> nested directories.

I think we benefit from catching names that would trigger filesystem
case-folding, too. If I have submodules with names "foo" and "FOO", we
would not want to confuse them (or at least we should confuse them
equally on all platforms). I doubt you can do anything malicious, but it
might simply be annoying.

That implies to me using a custom function (even if its encoded form
ends up being understandable as url-encoding).

-Peff

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 18:57         ` Jonathan Nieder
@ 2018-08-14 21:08           ` Stefan Beller
  2018-08-14 21:12             ` Jonathan Nieder
  0 siblings, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-14 21:08 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Brandon Williams, Jeff King, git

On Tue, Aug 14, 2018 at 11:57 AM Jonathan Nieder <jrnieder@gmail.com> wrote:
>
> Hi,
>
> Brandon Williams wrote:
> > On 08/09, Jeff King wrote:
>
> >> One interesting thing about url-encoding is that it's not one-to-one.
> >> This case could also be %2F, which is a different file (on a
> >> case-sensitive filesystem). I think "%20" and "+" are similarly
> >> interchangeable.
> >>
> >> If we were decoding the filenames, that's fine. The round-trip is
> >> lossless.
> >>
> >> But that's not quite how the new code behaves. We encode the input and
> >> then check to see if it matches an encoding we previously performed. So
> >> if our urlencode routines ever change, this will subtly break.
> >>
> >> I don't know how much it's worth caring about. We're not that likely to
> >> change the routines ourself (though certainly a third-party
> >> implementation would need to know our exact url-encoding decisions).
> >
> > This is exactly the reason why I wanted to get some opinions on what the
> > best thing to do here would be.  I _think_ the best thing would probably
> > be to write a specific routine to do the conversion, and it wouldn't
> > even have to be all that complex.  Basically I'm just interested in
> > converting '/' characters so that things no longer behave like
> > nested directories.
>
> First of all, I think the behavior with this patch is already much
> better than the previous status quo.  I'm using the patch now and am
> very happy with it.
>
> Second, what if we store the pathname in config?  We already store the
> URL there:
>
>         [submodule "plugins/hooks"]
>                 url = https://gerrit.googlesource.com/plugins/hooks
>
> So we could (as a followup patch) do something like
>
>         [submodule "plugins/hooks"]
>                 url = https://gerrit.googlesource.com/plugins/hooks
>                 gitdirname = plugins%2fhooks
>
> and use that for lookups instead of regenerating the directory name.
> What do you think?

As I just looked at worktree code, this sounds intriguing for the wrong
reason (again), as a user may want to point the gitdirname to a repository
that they have already on disk outside the actual superproject. They
would be reinventing worktrees in the submodule space. ;-)

This would open up the security hole that we just had, again.
So we'd have to make sure that the gitdirname (instead of the
now meaningless subsection name) is proof to ../ attacks.

I feel uneasy about this as then the user might come in
and move submodules and repoint the gitdirname...
to a not url encoded path. Exposing this knob just
asks for trouble, no?

On the other hand, the only requirement for the "name" is
now uniqueness, and that is implied with subsections,
so I guess it looks elegant.

What would happen if gitdirname is changed as part of
history? (The same problem we have now with changing
the subsection name)

The more I think about it the less appealing this is, but it looks
elegant.

Stefan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 21:08           ` Stefan Beller
@ 2018-08-14 21:12             ` Jonathan Nieder
  2018-08-14 22:34               ` Stefan Beller
  0 siblings, 1 reply; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-14 21:12 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Brandon Williams, Jeff King, git

Hi,

Stefan Beller wrote:
> On Tue, Aug 14, 2018 at 11:57 AM Jonathan Nieder <jrnieder@gmail.com> wrote:

>> Second, what if we store the pathname in config?  We already store the
>> URL there:
>>
>>         [submodule "plugins/hooks"]
>>                 url = https://gerrit.googlesource.com/plugins/hooks
>>
>> So we could (as a followup patch) do something like
>>
>>         [submodule "plugins/hooks"]
>>                 url = https://gerrit.googlesource.com/plugins/hooks
>>                 gitdirname = plugins%2fhooks
>>
>> and use that for lookups instead of regenerating the directory name.
>> What do you think?
>
> As I just looked at worktree code, this sounds intriguing for the wrong
> reason (again), as a user may want to point the gitdirname to a repository
> that they have already on disk outside the actual superproject. They
> would be reinventing worktrees in the submodule space. ;-)
>
> This would open up the security hole that we just had, again.
> So we'd have to make sure that the gitdirname (instead of the
> now meaningless subsection name) is proof to ../ attacks.
>
> I feel uneasy about this as then the user might come in
> and move submodules and repoint the gitdirname...
> to a not url encoded path. Exposing this knob just
> asks for trouble, no?

What if we forbid directory separator characters in the gitdirname?

[...]
> What would happen if gitdirname is changed as part of
> history? (The same problem we have now with changing
> the subsection name)

In this proposal, it would only be read from config, not from
.gitmodules.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 21:12             ` Jonathan Nieder
@ 2018-08-14 22:34               ` Stefan Beller
  2018-08-16  2:34                 ` Jonathan Nieder
  0 siblings, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-14 22:34 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Brandon Williams, Jeff King, git

On Tue, Aug 14, 2018 at 2:12 PM Jonathan Nieder <jrnieder@gmail.com> wrote:
>
> Hi,
>
> Stefan Beller wrote:
> > On Tue, Aug 14, 2018 at 11:57 AM Jonathan Nieder <jrnieder@gmail.com> wrote:
>
> >> Second, what if we store the pathname in config?  We already store the
> >> URL there:
> >>
> >>         [submodule "plugins/hooks"]
> >>                 url = https://gerrit.googlesource.com/plugins/hooks
> >>
> >> So we could (as a followup patch) do something like
> >>
> >>         [submodule "plugins/hooks"]
> >>                 url = https://gerrit.googlesource.com/plugins/hooks
> >>                 gitdirname = plugins%2fhooks
> >>
> >> and use that for lookups instead of regenerating the directory name.
> >> What do you think?
> >
> > As I just looked at worktree code, this sounds intriguing for the wrong
> > reason (again), as a user may want to point the gitdirname to a repository
> > that they have already on disk outside the actual superproject. They
> > would be reinventing worktrees in the submodule space. ;-)
> >
> > This would open up the security hole that we just had, again.
> > So we'd have to make sure that the gitdirname (instead of the
> > now meaningless subsection name) is proof to ../ attacks.
> >
> > I feel uneasy about this as then the user might come in
> > and move submodules and repoint the gitdirname...
> > to a not url encoded path. Exposing this knob just
> > asks for trouble, no?
>
> What if we forbid directory separator characters in the gitdirname?

Fine with me, but ideally we'd want to allow sharding the
submodules. When you have 1000 submodules
we'd want them not all inside the toplevel "modules/" ?
Up to now we could just wave hands and claim the user
(who is clearly experienced with submodules as they
use so many of them) would shard it properly.

With this scheme we loose the ability to shard.

> [...]
> > What would happen if gitdirname is changed as part of
> > history? (The same problem we have now with changing
> > the subsection name)
>
> In this proposal, it would only be read from config, not from
> .gitmodules.

Ah good point. That makes sense.

Stepping back a bit regarding the config:
When I clone gerrit (or any repo using submodules)

$ git clone --recurse-submodules \
  https://gerrit.googlesource.com/gerrit g2
[...]
$ cat g2/.git/config
[submodule]
    active = .
[submodule "plugins/codemirror-editor"]
    url = https://gerrit.googlesource.com/plugins/codemirror-editor
[... more urls to follow...]

Originally we have had the url in the config, (a) that we can change
the URLs after the "git submodule init" and "git submodule update"
step that actually clones the submodule if not present and much more
importantly (b) to know which submodule "was initialized/active".

Now that we have the submodule.active or even
submodule.<name>.active flags, we do not need (b) any more.
So the URL turns into a useless piece of cruft that just is unneeded
and might confuse the user.

So maybe I'd want to propose a patch that removes
submodule.<name>.url from the config once it is cloned.
(I just read up on "submodule sync" again, but that might not
even need special care for this new world)

And with all that said, I think if we can avoid having the submodules
gitdir in the config, the config would look much cleaner, too.

But maybe that is the wrong thing to optimize for. ;-)
It just demonstrates that we'd have a submodule specific
thing again in the config.

So my preference would be to do a similar thing as
url-encoding as that solves the issue of slashes and
potentially of case sensitivity (e.g. encode upper case A
as lower case with underscore _a)

However the transition worries me, as it transitions
within the same namespace. Back then when we
transferred from the .git dir inside the submodules
working tree to the embedded version in the superprojects
.git dir, there was no overlap, and any potential directory
in .git/modules/ that was already there, was highly
unusual, so asking the user for help is the reasonable
thing to do.
But now we might run into issues that has overlap between
old(name as is) and new (urlencoded) world.

So maybe we also want to transition from

    modules/<name>

to

    submodules/<urlencoded(<name>)>

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-08 22:33   ` [PATCH 2/2] submodule: munge paths to submodule git directories Brandon Williams
  2018-08-09 21:26     ` Jeff King
@ 2018-08-16  0:19     ` Aaron Schrab
  1 sibling, 0 replies; 40+ messages in thread
From: Aaron Schrab @ 2018-08-16  0:19 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

At 15:33 -0700 08 Aug 2018, Brandon Williams <bmwill@google.com> wrote:
>Teach "submodule_name_to_gitdir()" to munge a submodule's name (by url
>encoding it) before using it to build a path to the submodule's gitdir.

Seems like this will be a problem if it results in names that exceed 
NAME_MAX? On common systems that's 255, so it's probably not going to be 
common; but it certainly could for some repositories.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 22:34               ` Stefan Beller
@ 2018-08-16  2:34                 ` Jonathan Nieder
  2018-08-16  2:39                   ` Stefan Beller
  2018-08-16 15:07                   ` [PATCH 2/2] submodule: munge paths to submodule git directories Junio C Hamano
  0 siblings, 2 replies; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-16  2:34 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Brandon Williams, Jeff King, git

Hi again,

Stefan Beller wrote:
> On Tue, Aug 14, 2018 at 2:12 PM Jonathan Nieder <jrnieder@gmail.com> wrote:

>> What if we forbid directory separator characters in the gitdirname?
>
> Fine with me, but ideally we'd want to allow sharding the
> submodules. When you have 1000 submodules
> we'd want them not all inside the toplevel "modules/" ?

That's a good reason to permit slashes in the gitdirname.

If I understood the rest of your reply correctly, your worry was about
dangerous gitdirname values in .gitmodules.  I never had any wish to
read them from there anyway, so this worry hopefully goes away.

[...]
>> In this proposal, it would only be read from config, not from
>> .gitmodules.
>
> Ah good point. That makes sense.
>
> Stepping back a bit regarding the config:
[...]
> Now that we have the submodule.active or even
> submodule.<name>.active flags, we do not need (b) any more.
> So the URL turns into a useless piece of cruft that just is unneeded
> and might confuse the user.
>
> So maybe I'd want to propose a patch that removes
> submodule.<name>.url from the config once it is cloned.
> (I just read up on "submodule sync" again, but that might not
> even need special care for this new world)
>
> And with all that said, I think if we can avoid having the submodules
> gitdir in the config, the config would look much cleaner, too.

Yes, I understand and agree with this.

I should further spell out my motivation with this gitdirname
suggestion.  The issue that some people have mentioned in this thread
is that urlencoding might not be perfect --- it's pretty close to
perfect, but it's likely we'll come up with some unanticipated needs
later (like sharding) that it doesn't solve.  Solving those all right
now would not necessarily be wise, since the thing about unanticipated
needs is that you never know in advance what they will be. ;-)

So it would be nice, for future-proofing, if we can change the naming
scheme later.

As a bonus, that would also make interoperability with other
implementations easier.  For example, suppose we mess up in JGit and
urlencode a different set of characters than Git does.  Then a mixed
Git + JGit installation would have this subtle bug of the submodule
.git directory not being reused when I switch to and from and branch
not containing that submodule, in some circumstances.  That sounds
difficult to support.

Whereas if we have a gitdirname configuration variable, then JGit and
libgit2 and go-git do not have to match the naming scheme Git chooses.
They can try, but if one gets it subtly wrong then that is okay
because the submodule's directory name is right there and easy to look
up.

All at the cost of recording a little configuration somewhere.  If we
want to decrease the configuration, we can avoid recording it there in
the easy cases (e.g. when name == gitdirname).  That's "just" an
optimization.

And then we have the ability later to handle all the edge cases we
haven't handled yet today:

- sharding when the number of submodules is too large
- case-insensitive filesystems
- path name length limits
- different sets of filesystem-special characters

Sane?

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-16  2:34                 ` Jonathan Nieder
@ 2018-08-16  2:39                   ` Stefan Beller
  2018-08-16  2:47                     ` Jonathan Nieder
  2018-08-16 15:07                   ` [PATCH 2/2] submodule: munge paths to submodule git directories Junio C Hamano
  1 sibling, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-16  2:39 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Brandon Williams, Jeff King, git

> [...]

all good reasons; ship it :-)

> All at the cost of recording a little configuration somewhere.  If we
> want to decrease the configuration, we can avoid recording it there in
> the easy cases (e.g. when name == gitdirname).  That's "just" an
> optimization.

Sounds good, but gerrit for example would not take advantage of such
optimisation as they have slashes in their submodules. :-(
I wonder if we can optimize further and keep slashes if there is
no conflict (as then name == gitdirname, so it can be optimized).

> And then we have the ability later to handle all the edge cases we
> haven't handled yet today:
>
> - sharding when the number of submodules is too large
> - case-insensitive filesystems
> - path name length limits
> - different sets of filesystem-special characters
>
> Sane?

I'll keep thinking about it.

FYI: the reduction in configuration was just sent out.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-16  2:39                   ` Stefan Beller
@ 2018-08-16  2:47                     ` Jonathan Nieder
  2018-08-16 17:34                       ` Brandon Williams
  2018-08-16 18:19                       ` [PATCH] submodule: add config for where gitdirs are located Brandon Williams
  0 siblings, 2 replies; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-16  2:47 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Brandon Williams, Jeff King, git

Stefan Beller wrote:
> Jonathan Nieder wrote:

>> All at the cost of recording a little configuration somewhere.  If we
>> want to decrease the configuration, we can avoid recording it there in
>> the easy cases (e.g. when name == gitdirname).  That's "just" an
>> optimization.
>
> Sounds good, but gerrit for example would not take advantage of such
> optimisation as they have slashes in their submodules. :-(
> I wonder if we can optimize further and keep slashes if there is
> no conflict (as then name == gitdirname, so it can be optimized).

One possibility would be to treat gsub("/", "%2f") as another of the
easy cases.

[...]
>> And then we have the ability later to handle all the edge cases we
>> haven't handled yet today:
>>
>> - sharding when the number of submodules is too large
>> - case-insensitive filesystems
>> - path name length limits
>> - different sets of filesystem-special characters
>>
>> Sane?
>
> I'll keep thinking about it.

Thanks.

> FYI: the reduction in configuration was just sent out.

https://public-inbox.org/git/20180816023100.161626-1-sbeller@google.com/
for those following along.

Ciao,
Jonathan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-16  2:34                 ` Jonathan Nieder
  2018-08-16  2:39                   ` Stefan Beller
@ 2018-08-16 15:07                   ` Junio C Hamano
  1 sibling, 0 replies; 40+ messages in thread
From: Junio C Hamano @ 2018-08-16 15:07 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Stefan Beller, Brandon Williams, Jeff King, git

Jonathan Nieder <jrnieder@gmail.com> writes:

> So it would be nice, for future-proofing, if we can change the naming
> scheme later.
> ...
> All at the cost of recording a little configuration somewhere.  If we
> want to decrease the configuration, we can avoid recording it there in
> the easy cases (e.g. when name == gitdirname).  That's "just" an
> optimization.
>
> And then we have the ability later to handle all the edge cases we
> haven't handled yet today:
>
> - sharding when the number of submodules is too large
> - case-insensitive filesystems
> - path name length limits
> - different sets of filesystem-special characters
>
> Sane?

Yup.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-16  2:47                     ` Jonathan Nieder
@ 2018-08-16 17:34                       ` Brandon Williams
  2018-08-16 18:19                       ` [PATCH] submodule: add config for where gitdirs are located Brandon Williams
  1 sibling, 0 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-16 17:34 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Stefan Beller, Jeff King, git

On 08/15, Jonathan Nieder wrote:
> Stefan Beller wrote:
> > Jonathan Nieder wrote:
> 
> >> All at the cost of recording a little configuration somewhere.  If we
> >> want to decrease the configuration, we can avoid recording it there in
> >> the easy cases (e.g. when name == gitdirname).  That's "just" an
> >> optimization.
> >
> > Sounds good, but gerrit for example would not take advantage of such
> > optimisation as they have slashes in their submodules. :-(
> > I wonder if we can optimize further and keep slashes if there is
> > no conflict (as then name == gitdirname, so it can be optimized).
> 
> One possibility would be to treat gsub("/", "%2f") as another of the
> easy cases.
> 
> [...]
> >> And then we have the ability later to handle all the edge cases we
> >> haven't handled yet today:
> >>
> >> - sharding when the number of submodules is too large
> >> - case-insensitive filesystems
> >> - path name length limits
> >> - different sets of filesystem-special characters
> >>
> >> Sane?

Seems like a sensible thing to do. Let me work up some patches to
implement this using config primarily and these other schemes as
fallbacks.

> >
> > I'll keep thinking about it.
> 
> Thanks.
> 
> > FYI: the reduction in configuration was just sent out.
> 
> https://public-inbox.org/git/20180816023100.161626-1-sbeller@google.com/
> for those following along.
> 
> Ciao,
> Jonathan

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH] submodule: add config for where gitdirs are located
  2018-08-16  2:47                     ` Jonathan Nieder
  2018-08-16 17:34                       ` Brandon Williams
@ 2018-08-16 18:19                       ` Brandon Williams
  2018-08-20 22:03                         ` Junio C Hamano
  1 sibling, 1 reply; 40+ messages in thread
From: Brandon Williams @ 2018-08-16 18:19 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Introduce the config "submodule.<name>.gitdirpath" which is used to
indicate where a submodule's gitdir is located inside of a repository's
"modules" directory.

Signed-off-by: Brandon Williams <bmwill@google.com>
---

Maybe something like this on top?  Do you think we should disallow "../"
in this config, even though it is a repository local configuration and
not shipped in .gitmodules?

 submodule.c                | 13 ++++++++++++-
 t/t7400-submodule-basic.sh | 10 ++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/submodule.c b/submodule.c
index 4854d88ce8..0cb00a9f24 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1966,16 +1966,27 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r,
 			      const char *submodule_name)
 {
 	size_t modules_len;
+	char *key;
+	char *gitdir_path;
 
 	strbuf_git_common_path(buf, r, "modules/");
 	modules_len = buf->len;
-	strbuf_addstr(buf, submodule_name);
+
+	key = xstrfmt("submodule.%s.gitdirpath", submodule_name);
+	if (!repo_config_get_string(r, key, &gitdir_path)) {
+		strbuf_addstr(buf, gitdir_path);
+		free(key);
+		free(gitdir_path);
+		return;
+	}
+	free(key);
 
 	/*
 	 * If the submodule gitdir already exists using the old-fashioned
 	 * location (which uses the submodule name as-is, without munging it)
 	 * then return that.
 	 */
+	strbuf_addstr(buf, submodule_name);
 	if (!access(buf->buf, F_OK))
 		return;
 
diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh
index 963693332c..1555329a2f 100755
--- a/t/t7400-submodule-basic.sh
+++ b/t/t7400-submodule-basic.sh
@@ -1351,6 +1351,16 @@ test_expect_success 'resolve submodule gitdir in superprojects modules directory
 	$(git -C superproject rev-parse --git-common-dir)/modules/sub/module
 	EOF
 	git -C superproject submodule--helper gitdir "sub/module" >actual &&
+	test_cmp expect actual &&
+
+	# Test using "submodule.<name>.gitdirpath" config for where the submodules
+	# gitdir is located inside the superprojecs "modules" directory
+	mv superproject/.git/modules/sub/module superproject/.git/modules/submodule &&
+	cat >expect <<-EOF &&
+	$(git -C superproject rev-parse --git-common-dir)/modules/submodule
+	EOF
+	git -C superproject config "submodule.sub/module.gitdirpath" "submodule" &&
+	git -C superproject submodule--helper gitdir "sub/module" >actual &&
 	test_cmp expect actual
 '
 
-- 
2.18.0.865.gffc8e1a3cd6-goog


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] submodule: add config for where gitdirs are located
  2018-08-16 18:19                       ` [PATCH] submodule: add config for where gitdirs are located Brandon Williams
@ 2018-08-20 22:03                         ` Junio C Hamano
  0 siblings, 0 replies; 40+ messages in thread
From: Junio C Hamano @ 2018-08-20 22:03 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

> Introduce the config "submodule.<name>.gitdirpath" which is used to
> indicate where a submodule's gitdir is located inside of a repository's
> "modules" directory.
>
> Signed-off-by: Brandon Williams <bmwill@google.com>
> ---
>
> Maybe something like this on top?  Do you think we should disallow
> "../" in this config, even though it is a repository local
> configuration and not shipped in .gitmodules?

Sounds sensible to start strict and loosen later if/when necessary.

If we disallow "../", shouldn't we also reject an absolute path
(meaning, you can only specify a subdirectory of $GIT_DIR/modules/
in the super-project)?

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-14 18:04       ` Brandon Williams
  2018-08-14 18:57         ` Jonathan Nieder
  2018-08-14 18:58         ` Jeff King
@ 2018-08-28 21:35         ` Stefan Beller
  2018-08-29  5:25           ` Jeff King
  2 siblings, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-28 21:35 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, git

> > > -           echo "gitdir: ../../../.git/modules/sub3/modules/dirdir/subsub" >./sub3/dirdir/subsub/.git_expect
> > > +           echo "gitdir: ../../../.git/modules/sub3/modules/dirdir%2fsubsub" >./sub3/dirdir/subsub/.git_expect
> >
> > One interesting thing about url-encoding is that it's not one-to-one.
> > This case could also be %2F, which is a different file (on a
> > case-sensitive filesystem). I think "%20" and "+" are similarly
> > interchangeable.
> >
> > If we were decoding the filenames, that's fine. The round-trip is
> > lossless.
> >
> > But that's not quite how the new code behaves. We encode the input and
> > then check to see if it matches an encoding we previously performed. So
> > if our urlencode routines ever change, this will subtly break.

And this is the problem:
a) we have a 'complicated' encoding here, which must never change
b) the "encode and check if it matches", will produce ugly code going forward,
    as it tries to differentiate between submodules named "url_encoded(a)"
    and "a" (e.g. "a%20b" and "a b" would conflict and we have to resolve
    the conflict, although those two names are perfectly fine as they do not
    have the original problem of having slashes)

Hence I would propose a simpler encoding:

1)    / -> _ ( replace a slash by an underscore)
2)    _ -> __ (replace any underscore by 2 underscores, this is just the
          escaping mechanism to differentiate a/b and a_b)

3) (optional) instead of putting it all in modules/, use another
directory gitmodules/
    for example. this will make sure we can tell if a repository has
been converted
    or is stuck with a setup of a current git.

> This is exactly the reason why I wanted to get some opinions on what the
> best thing to do here would be.  I _think_ the best thing would probably
> be to write a specific routine to do the conversion, and it wouldn't
> even have to be all that complex.  Basically I'm just interested in
> converting '/' characters so that things no longer behave like
> nested directories.

Yeah, then let's just convert '/' with as little overhead as possible.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-28 21:35         ` Stefan Beller
@ 2018-08-29  5:25           ` Jeff King
  2018-08-29 18:10             ` Stefan Beller
  2018-08-29 21:09             ` Jonathan Nieder
  0 siblings, 2 replies; 40+ messages in thread
From: Jeff King @ 2018-08-29  5:25 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Brandon Williams, git

On Tue, Aug 28, 2018 at 02:35:25PM -0700, Stefan Beller wrote:

> 3) (optional) instead of putting it all in modules/, use another
>    directory gitmodules/ for example. this will make sure we can tell
>    if a repository has been converted or is stuck with a setup of a
>    current git.

I actually kind of like that idea, as it makes the interaction between
old and new names much simpler to reason about.

And since old code won't know about the new names anyway, there's in
theory no downside. In practice, of course, the encoding may often be a
noop, and lazy scripts would continue to work most of the time if you
didn't change out the prefix directory. I'm not sure if that is an
argument for the scheme (because it will suss out broken scripts more
consistently) or against it (because 99% of the time those old scripts
would just happen to work).

> > This is exactly the reason why I wanted to get some opinions on what the
> > best thing to do here would be.  I _think_ the best thing would probably
> > be to write a specific routine to do the conversion, and it wouldn't
> > even have to be all that complex.  Basically I'm just interested in
> > converting '/' characters so that things no longer behave like
> > nested directories.
> 
> Yeah, then let's just convert '/' with as little overhead as possible.

Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
colliding)?

I'm OK if the answer is "no", but if you do want to deal with it, the
time is probably now.

-Peff

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29  5:25           ` Jeff King
@ 2018-08-29 18:10             ` Stefan Beller
  2018-08-29 21:03               ` Jeff King
  2018-08-29 21:09             ` Jonathan Nieder
  1 sibling, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-29 18:10 UTC (permalink / raw)
  To: Jeff King; +Cc: Brandon Williams, git

On Tue, Aug 28, 2018 at 10:25 PM Jeff King <peff@peff.net> wrote:
>
> On Tue, Aug 28, 2018 at 02:35:25PM -0700, Stefan Beller wrote:
>
> > 3) (optional) instead of putting it all in modules/, use another
> >    directory gitmodules/ for example. this will make sure we can tell
> >    if a repository has been converted or is stuck with a setup of a
> >    current git.
>
> I actually kind of like that idea, as it makes the interaction between
> old and new names much simpler to reason about.
>
> And since old code won't know about the new names anyway, there's in
> theory no downside. In practice, of course, the encoding may often be a
> noop, and lazy scripts would continue to work most of the time if you
> didn't change out the prefix directory. I'm not sure if that is an
> argument for the scheme (because it will suss out broken scripts more
> consistently) or against it (because 99% of the time those old scripts
> would just happen to work).
>
> > > This is exactly the reason why I wanted to get some opinions on what the
> > > best thing to do here would be.  I _think_ the best thing would probably
> > > be to write a specific routine to do the conversion, and it wouldn't
> > > even have to be all that complex.  Basically I'm just interested in
> > > converting '/' characters so that things no longer behave like
> > > nested directories.
> >
> > Yeah, then let's just convert '/' with as little overhead as possible.
>
> Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
> colliding)?

I do. :(

2d84f13dcb6 (config: fix case sensitive subsection names on writing, 2018-08-08)
explains the latest episode of case folding with submodules involved.

> I'm OK if the answer is "no", but if you do want to deal with it, the
> time is probably now.

Good point. But as soon as we start discussing case sensitivity, we
are drawn down the rabbit hole of funny file names. (Try naming
a submodule "CON1" and obtain it on Windows for example)
So we would need to have a file system specific encoding function for
submodule names, which sounds like a maintenance night mare.

The CON1 example shows that URL encoding may not be enough
on Windows and we'd have to extend the encoding if we care about
FS issues.

Another example would be "a" and "a\b" which would be a mess
in Windows as the '\' would work as a dir separator whereas these
two names were ok on linux. This would be fixed with url encoding.

URL encoding would not fix the case-folding issue that you
mentioned above.

So if I was thinking in the scheme presented above, we could just
have another rule that is

  [A-Z]  -> _[a-z]

(lowercase capital letters and escape them with an underscore)

But with that rule added, we are inventing a really complicated
encoding scheme already.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 18:10             ` Stefan Beller
@ 2018-08-29 21:03               ` Jeff King
  2018-08-29 21:10                 ` Stefan Beller
  0 siblings, 1 reply; 40+ messages in thread
From: Jeff King @ 2018-08-29 21:03 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Brandon Williams, git

On Wed, Aug 29, 2018 at 11:10:51AM -0700, Stefan Beller wrote:

> > Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
> > colliding)?
> 
> I do. :(
> 
> 2d84f13dcb6 (config: fix case sensitive subsection names on writing, 2018-08-08)
> explains the latest episode of case folding with submodules involved.
> 
> > I'm OK if the answer is "no", but if you do want to deal with it, the
> > time is probably now.
> 
> Good point. But as soon as we start discussing case sensitivity, we
> are drawn down the rabbit hole of funny file names. (Try naming
> a submodule "CON1" and obtain it on Windows for example)
> So we would need to have a file system specific encoding function for
> submodule names, which sounds like a maintenance night mare.

Hmph. I'd hoped that simply escaping metacharacters and doing some
obvious case-folding would be enough. And I think that would cover most
accidental cases. But yeah, Windows reserved names are basically
indistinguishable from reasonable names. They'd probably need
special-cased.

OTOH, I'm not sure how we handle those for entries in the actual tree.
Poking around git-for-windows/git, I think it uses the magic "\\?"
marker to tell the OS to interpret the name literally.

So I wonder if it might be sufficient to just deal with the more obvious
folding issues. Or as you noted, if we just choose lowercase names as
the normalized form, that might also be enough. :)

> So if I was thinking in the scheme presented above, we could just
> have another rule that is
> 
>   [A-Z]  -> _[a-z]
> 
> (lowercase capital letters and escape them with an underscore)

Yes, that makes even the capitalized "CON" issues go away. It's not a
one-to-one mapping, though ("foo-" and "foo_" map to the same entity).

If we want that, too, I think something like url-encoding is fine, with
the caveat that we simply urlencode _more_ things (i.e., anything not in
[a-z_]).

-Peff

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29  5:25           ` Jeff King
  2018-08-29 18:10             ` Stefan Beller
@ 2018-08-29 21:09             ` Jonathan Nieder
  2018-08-29 21:14               ` Stefan Beller
  2018-08-29 21:32               ` Jeff King
  1 sibling, 2 replies; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-29 21:09 UTC (permalink / raw)
  To: Jeff King; +Cc: Stefan Beller, Brandon Williams, git

Jeff King wrote:
> On Tue, Aug 28, 2018 at 02:35:25PM -0700, Stefan Beller wrote:

>> Yeah, then let's just convert '/' with as little overhead as possible.
>
> Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
> colliding)?
>
> I'm OK if the answer is "no", but if you do want to deal with it, the
> time is probably now.

Have we rejected the config approach?  I really liked the attribute of
not having to solve everything right away.  I'm getting scared that
we've forgotten that goal.

It mixes well with Stefan's idea of setting up a new .git/submodules/
directory.  We could require that everything in .git/submodules/ have
configuration (or that everything in that directory either have
configuration or be the result of a "very simple" transformation) and
that way, all ambiguity goes away.

Part of the definition of "very simple" could be that the submodule
name must consist of some whitelisted list of characters (including no
uppercase), for example.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:03               ` Jeff King
@ 2018-08-29 21:10                 ` Stefan Beller
  2018-08-29 21:18                   ` Jonathan Nieder
  2018-08-29 21:30                   ` Jeff King
  0 siblings, 2 replies; 40+ messages in thread
From: Stefan Beller @ 2018-08-29 21:10 UTC (permalink / raw)
  To: Jeff King; +Cc: Brandon Williams, git

> Yes, that makes even the capitalized "CON" issues go away. It's not a
> one-to-one mapping, though ("foo-" and "foo_" map to the same entity).

foo_ would map to foo__, and foo- would map to something else.
(foo- as we do not rewrite dashes, yet?)

>
> If we want that, too, I think something like url-encoding is fine, with
> the caveat that we simply urlencode _more_ things (i.e., anything not in
> [a-z_]).

Yeah I think we need more than url encoding now.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:09             ` Jonathan Nieder
@ 2018-08-29 21:14               ` Stefan Beller
  2018-08-29 21:25                 ` Brandon Williams
  2018-08-29 21:32               ` Jeff King
  1 sibling, 1 reply; 40+ messages in thread
From: Stefan Beller @ 2018-08-29 21:14 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Jeff King, Brandon Williams, git

On Wed, Aug 29, 2018 at 2:09 PM Jonathan Nieder <jrnieder@gmail.com> wrote:
>
> Jeff King wrote:
> > On Tue, Aug 28, 2018 at 02:35:25PM -0700, Stefan Beller wrote:
>
> >> Yeah, then let's just convert '/' with as little overhead as possible.
> >
> > Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
> > colliding)?
> >
> > I'm OK if the answer is "no", but if you do want to deal with it, the
> > time is probably now.
>
> Have we rejected the config approach?

I did not reject that approach, but am rather waiting for patches. ;-)

> I really liked the attribute of
> not having to solve everything right away.  I'm getting scared that
> we've forgotten that goal.

Eh, sorry for side tracking this issue.

I am just under the impression that the URL encoding is not particularly
good for our use case as it solves just one out of many things, whereas
the one thing (having no slashes) can also be solved in an easier way.

> It mixes well with Stefan's idea of setting up a new .git/submodules/
> directory.  We could require that everything in .git/submodules/ have
> configuration (or that everything in that directory either have
> configuration or be the result of a "very simple" transformation) and
> that way, all ambiguity goes away.

I would not want to have a world where we require that config, but I
would agree to the latter, hence we would need to discuss "very simple".
I guess that are 2 or 3 rules at most.

> Part of the definition of "very simple" could be that the submodule
> name must consist of some whitelisted list of characters (including no
> uppercase), for example.

Good catch!

Thanks for chiming in,
Stefan

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:10                 ` Stefan Beller
@ 2018-08-29 21:18                   ` Jonathan Nieder
  2018-08-29 21:27                     ` Stefan Beller
  2018-08-29 21:30                   ` Jeff King
  1 sibling, 1 reply; 40+ messages in thread
From: Jonathan Nieder @ 2018-08-29 21:18 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jeff King, Brandon Williams, git

Hi,

Stefan Beller wrote:

>> Yes, that makes even the capitalized "CON" issues go away. It's not a
>> one-to-one mapping, though ("foo-" and "foo_" map to the same entity).
>
> foo_ would map to foo__, and foo- would map to something else.
> (foo- as we do not rewrite dashes, yet?)
>
>> If we want that, too, I think something like url-encoding is fine, with
>> the caveat that we simply urlencode _more_ things (i.e., anything not in
>> [a-z_]).
>
> Yeah I think we need more than url encoding now.

Can you say more?  Perhaps my expectations have been poisoned by tools
like dpkg-buildpackage that use urlencode.  As far as I can tell, it
works fine.

Moreover, urlencode has some attributes that make it a good potential
fit: it's intuitive, it's unambiguous (yes, it's one-to-many, but at
least it's not many-to-many), and people know how to deal with it from
their lives using browsers.  Can you spell out for me what problem
we're solving with something more custom?

Stepping back, I am very worried about any design that doesn't give us
the ability to tweak things later.  See [1] and [2] for more on that
subject.

Thanks,
Jonathan

[1] https://public-inbox.org/git/20180816023446.GA127655@aiede.svl.corp.google.com/
[2] https://public-inbox.org/git/20180829210913.GF7547@aiede.svl.corp.google.com/

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:14               ` Stefan Beller
@ 2018-08-29 21:25                 ` Brandon Williams
  0 siblings, 0 replies; 40+ messages in thread
From: Brandon Williams @ 2018-08-29 21:25 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Jonathan Nieder, Jeff King, git

On 08/29, Stefan Beller wrote:
> On Wed, Aug 29, 2018 at 2:09 PM Jonathan Nieder <jrnieder@gmail.com> wrote:
> >
> > Jeff King wrote:
> > > On Tue, Aug 28, 2018 at 02:35:25PM -0700, Stefan Beller wrote:
> >
> > >> Yeah, then let's just convert '/' with as little overhead as possible.
> > >
> > > Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
> > > colliding)?
> > >
> > > I'm OK if the answer is "no", but if you do want to deal with it, the
> > > time is probably now.
> >
> > Have we rejected the config approach?
> 
> I did not reject that approach, but am rather waiting for patches. ;-)

Note I did send out a patch using this approach, so no need to wait any
longer! :D

https://public-inbox.org/git/20180816181940.46114-1-bmwill@google.com/

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:18                   ` Jonathan Nieder
@ 2018-08-29 21:27                     ` Stefan Beller
  0 siblings, 0 replies; 40+ messages in thread
From: Stefan Beller @ 2018-08-29 21:27 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Jeff King, Brandon Williams, git

On Wed, Aug 29, 2018 at 2:18 PM Jonathan Nieder <jrnieder@gmail.com> wrote:
>
> Hi,
>
> Stefan Beller wrote:
>
> >> Yes, that makes even the capitalized "CON" issues go away. It's not a
> >> one-to-one mapping, though ("foo-" and "foo_" map to the same entity).
> >
> > foo_ would map to foo__, and foo- would map to something else.
> > (foo- as we do not rewrite dashes, yet?)
> >
> >> If we want that, too, I think something like url-encoding is fine, with
> >> the caveat that we simply urlencode _more_ things (i.e., anything not in
> >> [a-z_]).
> >
> > Yeah I think we need more than url encoding now.
>
> Can you say more?

https://public-inbox.org/git/CAGZ79kZv4BjRq=kq_1UeT2Kn38OZwYFgnMsTe6X_WP41=hBtSQ@mail.gmail.com/

> Can you spell out for me what problem we're solving with something more custom?

case sensitivity for example.

> the ability to tweak things later.

That is unrelated to the choice of encoding, but more related to
https://public-inbox.org/git/20180816181940.46114-1-bmwill@google.com/

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:10                 ` Stefan Beller
  2018-08-29 21:18                   ` Jonathan Nieder
@ 2018-08-29 21:30                   ` Jeff King
  1 sibling, 0 replies; 40+ messages in thread
From: Jeff King @ 2018-08-29 21:30 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Brandon Williams, git

On Wed, Aug 29, 2018 at 02:10:37PM -0700, Stefan Beller wrote:

> > Yes, that makes even the capitalized "CON" issues go away. It's not a
> > one-to-one mapping, though ("foo-" and "foo_" map to the same entity).
> 
> foo_ would map to foo__, and foo- would map to something else.
> (foo- as we do not rewrite dashes, yet?)

Ah, OK, I took your:

>   [A-Z]  -> _[a-z]

to mean "A-Z becomes a-z, and everything else becomes underscore".

If you mean a real one-to-one mapping that allows a-z and only a few
safe metacharacters, then yeah, that's what I was thinking, too.

> > If we want that, too, I think something like url-encoding is fine, with
> > the caveat that we simply urlencode _more_ things (i.e., anything not in
> > [a-z_]).
> 
> Yeah I think we need more than url encoding now.

If you take "url encoding" to only be the mechanical transformation of
quoting, not the set of _what_ gets quoting, we can still stick with it.
We don't need to, but it's probably no worse than inventing our own
set of quoting rules.

-Peff

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] submodule: munge paths to submodule git directories
  2018-08-29 21:09             ` Jonathan Nieder
  2018-08-29 21:14               ` Stefan Beller
@ 2018-08-29 21:32               ` Jeff King
  1 sibling, 0 replies; 40+ messages in thread
From: Jeff King @ 2018-08-29 21:32 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Stefan Beller, Brandon Williams, git

On Wed, Aug 29, 2018 at 02:09:13PM -0700, Jonathan Nieder wrote:

> Jeff King wrote:
> > On Tue, Aug 28, 2018 at 02:35:25PM -0700, Stefan Beller wrote:
> 
> >> Yeah, then let's just convert '/' with as little overhead as possible.
> >
> > Do you care about case-folding issues (e.g., submodules "FOO" and "foo"
> > colliding)?
> >
> > I'm OK if the answer is "no", but if you do want to deal with it, the
> > time is probably now.
> 
> Have we rejected the config approach?  I really liked the attribute of
> not having to solve everything right away.  I'm getting scared that
> we've forgotten that goal.

I personally have no problem with that approach, but I also haven't
thought that hard about it (I was mostly ignoring the discussion since
it seemed like submodule-interested folks, but I happened to see what
looked like a potentially bad idea cc'd to me ;) ).

-Peff

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC] submodule: munge paths to submodule git directories
  2018-08-07 23:06 [RFC] submodule: munge paths to submodule git directories Brandon Williams
                   ` (2 preceding siblings ...)
  2018-08-08 22:33 ` [PATCH 0/2] munge submodule names Brandon Williams
@ 2019-01-15  1:25 ` " Jonathan Nieder
  2019-01-17 17:32   ` Jeff King
  3 siblings, 1 reply; 40+ messages in thread
From: Jonathan Nieder @ 2019-01-15  1:25 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Brandon Williams, Stefan Beller, Aaron Schrab

Hi,

In August, 2018, Brandon Williams wrote:

> Commit 0383bbb901 (submodule-config: verify submodule names as paths,
> 2018-04-30) introduced some checks to ensure that submodule names don't
> include directory traversal components (e.g. "../").
>
> This addresses the vulnerability identified in 0383bbb901 but the root
> cause is that we use submodule names to construct paths to the
> submodule's git directory.  What we really should do is munge the
> submodule name before using it to construct a path.

Thanks again for this.  I liked the proposal enough to run Git with
patches implementing it for a while.  That said, there were some
unaddressed comments in the review.

I've put a summary in https://crbug.com/git/28 to make this easier to
pick up where we left off.  Summary from there of the upstream review:

1. Using urlencoding to escape the slashes is fine, but what if we
   want to escape some other character (for example to handle
   case-insensitive filesystems)?

   Proposal: Store the escaping mapping in config[1] so it can be
   modified it in the future:

	[submodule "plugin/hooks"]
		gitdirname = plugins%2fhooks

2. The urlencoded name could conflict with a submodule that has % in
   its name in an existing clone created by an older version of Git.

   Proposal: Put submodules in a new .git/submodules/ directory
   instead of .git/modules/.

3. These gitdirname settings can clutter up .git/config.

   Proposal: For the "easy" cases (e.g. submodule name consisting of
   [a-z]*), allow omitting the gitdirname setting.

Is that a fair summary?  Are there concerns from the review that I
forgot, or would a new version of the series that addresses those
three problems put us in good shape?

Thanks,
Jonathan

[1] https://public-inbox.org/git/20180816181940.46114-1-bmwill@google.com/

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC] submodule: munge paths to submodule git directories
  2019-01-15  1:25 ` [RFC] " Jonathan Nieder
@ 2019-01-17 17:32   ` Jeff King
  2019-01-17 17:57     ` Stefan Beller
  0 siblings, 1 reply; 40+ messages in thread
From: Jeff King @ 2019-01-17 17:32 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: git, Brandon Williams, Stefan Beller, Aaron Schrab

On Mon, Jan 14, 2019 at 05:25:07PM -0800, Jonathan Nieder wrote:

> I've put a summary in https://crbug.com/git/28 to make this easier to
> pick up where we left off.  Summary from there of the upstream review:
> 
> 1. Using urlencoding to escape the slashes is fine, but what if we
>    want to escape some other character (for example to handle
>    case-insensitive filesystems)?
> 
>    Proposal: Store the escaping mapping in config[1] so it can be
>    modified it in the future:
> 
> 	[submodule "plugin/hooks"]
> 		gitdirname = plugins%2fhooks

I think it might be worth dealing with case-sensitivity _now_, since we
know it's a problem. That doesn't make the problem of "what if we want
to change the mapping later" go away, but it does make it a lot less
likely to come up.

> 2. The urlencoded name could conflict with a submodule that has % in
>    its name in an existing clone created by an older version of Git.
> 
>    Proposal: Put submodules in a new .git/submodules/ directory
>    instead of .git/modules/.

This proposal is orthogonal to (1), right? I.e., if we store the mapping
then that is what tells us we're using the mapped name.

> 3. These gitdirname settings can clutter up .git/config.
> 
>    Proposal: For the "easy" cases (e.g. submodule name consisting of
>    [a-z]*), allow omitting the gitdirname setting.

Not having thought about it too hard, I suspect that may open back up
corner cases with respect to backwards compatibility and ambiguity.

Are you worried about human-readable clutter? I.e., that .git/config
becomes hard to read? If so, then:

  - I doubt this is any worse than the existing tracking-branch config.

  - it might be reasonable to store it in .git/submodule-config, and
    make sure that .git/config contains a single "[include]path =
    submodule-config" line. I've been tempted to do that for
    tracking-branch config.

Or are you worried about the cost of parsing those entries? Basically
every git command parses config linearly at least once; this normally
isn't noticeable, but at some size it becomes a problem. I have no idea
what that size is.

If so, then I think we'd want submodule config in its own file but
_without_ an include from the normal config file. That would break
compatibility with anything that tries to use "git config
submodule.foo.path", etc.

That's all just musing. I'm actually not really convinced it's a
problem.

> Is that a fair summary?  Are there concerns from the review that I
> forgot, or would a new version of the series that addresses those
> three problems put us in good shape?

I don't really have a strong opinion either way. I still think the
one-way transformation that the patch uses is less elegant than a real
encode/decode round-trip (i.e., what I discussed in [1]). But I admit to
not having thought through all of the details of the encode/decode
thing, and certainly have not written the code.

[1] http://public-inbox.org/git/20180809212602.GA11342@sigill.intra.peff.net/

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [RFC] submodule: munge paths to submodule git directories
  2019-01-17 17:32   ` Jeff King
@ 2019-01-17 17:57     ` Stefan Beller
  0 siblings, 0 replies; 40+ messages in thread
From: Stefan Beller @ 2019-01-17 17:57 UTC (permalink / raw)
  To: Jeff King; +Cc: Jonathan Nieder, git, Brandon Williams, Aaron Schrab

On Thu, Jan 17, 2019 at 9:32 AM Jeff King <peff@peff.net> wrote:
>
> On Mon, Jan 14, 2019 at 05:25:07PM -0800, Jonathan Nieder wrote:
>
> > I've put a summary in https://crbug.com/git/28 to make this easier to
> > pick up where we left off.  Summary from there of the upstream review:
> >
> > 1. Using urlencoding to escape the slashes is fine, but what if we
> >    want to escape some other character (for example to handle
> >    case-insensitive filesystems)?
> >
> >    Proposal: Store the escaping mapping in config[1] so it can be
> >    modified it in the future:
> >
> >       [submodule "plugin/hooks"]
> >               gitdirname = plugins%2fhooks
>
> I think it might be worth dealing with case-sensitivity _now_, since we
> know it's a problem. That doesn't make the problem of "what if we want
> to change the mapping later" go away, but it does make it a lot less
> likely to come up.

Makes sense.

>
> > 2. The urlencoded name could conflict with a submodule that has % in
> >    its name in an existing clone created by an older version of Git.
> >
> >    Proposal: Put submodules in a new .git/submodules/ directory
> >    instead of .git/modules/.
>
> This proposal is orthogonal to (1), right? I.e., if we store the mapping
> then that is what tells us we're using the mapped name.

Technically true, but it allows for easier implementation:
now we have 2 distinct namespaces, such that we can avoid
double booking easier:

    Consider 2 submodules "a b" and "a%20b".

Without (2), (1) is hard to explain as the first might have been encoded
to a%20b or there might have been the second put in place from a
current (old) version of Git. So we'd have to reason about these corner cases.

With (2) in place, we'd only ever have the second in a place "a%2520b"
(if I am to trust https://www.urlencoder.org/)

> > 3. These gitdirname settings can clutter up .git/config.
> >
> >    Proposal: For the "easy" cases (e.g. submodule name consisting of
> >    [a-z]*), allow omitting the gitdirname setting.
>
> Not having thought about it too hard, I suspect that may open back up
> corner cases with respect to backwards compatibility and ambiguity.
>
> Are you worried about human-readable clutter? I.e., that .git/config
> becomes hard to read? If so, then:
>
>   - I doubt this is any worse than the existing tracking-branch config.
>
>   - it might be reasonable to store it in .git/submodule-config, and
>     make sure that .git/config contains a single "[include]path =
>     submodule-config" line. I've been tempted to do that for
>     tracking-branch config.
>
> Or are you worried about the cost of parsing those entries? Basically
> every git command parses config linearly at least once; this normally
> isn't noticeable, but at some size it becomes a problem. I have no idea
> what that size is.
>
> If so, then I think we'd want submodule config in its own file but
> _without_ an include from the normal config file. That would break
> compatibility with anything that tries to use "git config
> submodule.foo.path", etc.
>
> That's all just musing. I'm actually not really convinced it's a
> problem.

ok, we can deal with the problem once it arises.

>
> > Is that a fair summary?  Are there concerns from the review that I
> > forgot, or would a new version of the series that addresses those
> > three problems put us in good shape?
>
> I don't really have a strong opinion either way. I still think the
> one-way transformation that the patch uses is less elegant than a real
> encode/decode round-trip (i.e., what I discussed in [1]). But I admit to
> not having thought through all of the details of the encode/decode
> thing, and certainly have not written the code.

The suggestion of adding at least a test for url encoding (2. from your mail)
is sensible.

Stefan

>
> [1] http://public-inbox.org/git/20180809212602.GA11342@sigill.intra.peff.net/

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, back to index

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-07 23:06 [RFC] submodule: munge paths to submodule git directories Brandon Williams
2018-08-07 23:25 ` Jonathan Nieder
2018-08-08  0:14 ` Junio C Hamano
2018-08-08 22:33 ` [PATCH 0/2] munge submodule names Brandon Williams
2018-08-08 22:33   ` [PATCH 1/2] submodule: create helper to build paths to submodule gitdirs Brandon Williams
2018-08-08 23:21     ` Stefan Beller
2018-08-09  0:45       ` Brandon Williams
2018-08-10 21:27     ` Junio C Hamano
2018-08-10 21:45       ` Brandon Williams
2018-08-08 22:33   ` [PATCH 2/2] submodule: munge paths to submodule git directories Brandon Williams
2018-08-09 21:26     ` Jeff King
2018-08-14 18:04       ` Brandon Williams
2018-08-14 18:57         ` Jonathan Nieder
2018-08-14 21:08           ` Stefan Beller
2018-08-14 21:12             ` Jonathan Nieder
2018-08-14 22:34               ` Stefan Beller
2018-08-16  2:34                 ` Jonathan Nieder
2018-08-16  2:39                   ` Stefan Beller
2018-08-16  2:47                     ` Jonathan Nieder
2018-08-16 17:34                       ` Brandon Williams
2018-08-16 18:19                       ` [PATCH] submodule: add config for where gitdirs are located Brandon Williams
2018-08-20 22:03                         ` Junio C Hamano
2018-08-16 15:07                   ` [PATCH 2/2] submodule: munge paths to submodule git directories Junio C Hamano
2018-08-14 18:58         ` Jeff King
2018-08-28 21:35         ` Stefan Beller
2018-08-29  5:25           ` Jeff King
2018-08-29 18:10             ` Stefan Beller
2018-08-29 21:03               ` Jeff King
2018-08-29 21:10                 ` Stefan Beller
2018-08-29 21:18                   ` Jonathan Nieder
2018-08-29 21:27                     ` Stefan Beller
2018-08-29 21:30                   ` Jeff King
2018-08-29 21:09             ` Jonathan Nieder
2018-08-29 21:14               ` Stefan Beller
2018-08-29 21:25                 ` Brandon Williams
2018-08-29 21:32               ` Jeff King
2018-08-16  0:19     ` Aaron Schrab
2019-01-15  1:25 ` [RFC] " Jonathan Nieder
2019-01-17 17:32   ` Jeff King
2019-01-17 17:57     ` Stefan Beller

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox