git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/2] ls-files: adding support for submodules
@ 2016-09-21 22:04 Brandon Williams
  2016-09-21 22:04 ` [PATCH 2/2] ls-files: add pathspec matching " Brandon Williams
  2016-09-21 22:08 ` [PATCH 1/2] ls-files: adding support " Brandon Williams
  0 siblings, 2 replies; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 22:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Allow ls-files to recognize submodules in order to retrieve a list of
files from a repository's submodules.  This is done by forking off a
process to recursively call ls-files on all submodules. Also added a
submodule-prefix command in order to prepend paths to child processes.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 Documentation/git-ls-files.txt         | 11 +++-
 builtin/ls-files.c                     | 61 +++++++++++++++++++++
 t/t3007-ls-files-recurse-submodules.sh | 99 ++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100755 t/t3007-ls-files-recurse-submodules.sh

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0d933ac..09e4449 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -18,7 +18,9 @@ SYNOPSIS
 		[--exclude-per-directory=<file>]
 		[--exclude-standard]
 		[--error-unmatch] [--with-tree=<tree-ish>]
-		[--full-name] [--abbrev] [--] [<file>...]
+		[--full-name] [--recurse-submodules]
+		[--submodule-prefix=<path>]
+		[--abbrev] [--] [<file>...]
 
 DESCRIPTION
 -----------
@@ -137,6 +139,13 @@ a space) at the start of each line:
 	option forces paths to be output relative to the project
 	top directory.
 
+--recurse-submodules::
+	Recursively calls ls-files on each submodule in the repository.
+	Currently there is only support for the --cached mode.
+
+--submodule-prefix=<path>::
+	Prepend the provided path to the output of each file
+
 --abbrev[=<n>]::
 	Instead of showing the full 40-byte hexadecimal object
 	lines, show only a partial prefix.
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 00ea91a..ffd9ea6 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "string-list.h"
 #include "pathspec.h"
+#include "run-command.h"
 
 static int abbrev;
 static int show_deleted;
@@ -28,6 +29,8 @@ static int show_valid_bit;
 static int line_terminator = '\n';
 static int debug_mode;
 static int show_eol;
+static const char *submodule_prefix;
+static int recurse_submodules;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -68,6 +71,21 @@ static void write_eolinfo(const struct cache_entry *ce, const char *path)
 static void write_name(const char *name)
 {
 	/*
+	 * NEEDSWORK: To make this thread-safe, full_name would have to be owned
+	 * by the caller.
+	 *
+	 * full_name get reused across output lines to minimize the allocation
+	 * churn.
+	 */
+	static struct strbuf full_name = STRBUF_INIT;
+	if (submodule_prefix && *submodule_prefix) {
+		strbuf_reset(&full_name);
+		strbuf_addstr(&full_name, submodule_prefix);
+		strbuf_addstr(&full_name, name);
+		name = full_name.buf;
+	}
+
+	/*
 	 * With "--full-name", prefix_len=0; this caller needs to pass
 	 * an empty string in that case (a NULL is good for "").
 	 */
@@ -152,6 +170,26 @@ static void show_killed_files(struct dir_struct *dir)
 	}
 }
 
+/**
+ * Recursively call ls-files on a submodule
+ */
+static void show_gitlink(const struct cache_entry *ce)
+{
+	struct child_process cp = CHILD_PROCESS_INIT;
+	int status;
+
+	argv_array_push(&cp.args, "ls-files");
+	argv_array_push(&cp.args, "--recurse-submodules");
+	argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
+			 submodule_prefix ? submodule_prefix : "",
+			 ce->name);
+	cp.git_cmd = 1;
+	cp.dir = ce->name;
+	status = run_command(&cp);
+	if (status)
+		exit(status);
+}
+
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
 	int len = max_prefix_len;
@@ -163,6 +201,10 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 			    len, ps_matched,
 			    S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
 		return;
+	if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+		show_gitlink(ce);
+		return;
+	}
 
 	if (tag && *tag && show_valid_bit &&
 	    (ce->ce_flags & CE_VALID)) {
@@ -468,6 +510,10 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 		{ OPTION_SET_INT, 0, "full-name", &prefix_len, NULL,
 			N_("make the output relative to the project top directory"),
 			PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL },
+		OPT_STRING(0, "submodule-prefix", &submodule_prefix,
+			N_("path"), N_("prepend <path> to each file")),
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("recurse through submodules")),
 		OPT_BOOL(0, "error-unmatch", &error_unmatch,
 			N_("if any <file> is not in the index, treat this as an error")),
 		OPT_STRING(0, "with-tree", &with_tree, N_("tree-ish"),
@@ -519,6 +565,21 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 	if (require_work_tree && !is_inside_work_tree())
 		setup_work_tree();
 
+	if (recurse_submodules &&
+	    (show_stage || show_deleted || show_others || show_unmerged ||
+	     show_killed || show_modified || show_resolve_undo ||
+	     show_valid_bit || show_tag || show_eol))
+		die("ls-files --recurse-submodules can only be used in "
+		    "--cached mode");
+
+	if (recurse_submodules && error_unmatch)
+		die("ls-files --recurse-submodules does not support "
+		    "--error-unmatch");
+
+	if (recurse_submodules && argc)
+		die("ls-files --recurse-submodules does not support path "
+		    "arguments");
+
 	parse_pathspec(&pathspec, 0,
 		       PATHSPEC_PREFER_CWD |
 		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
diff --git a/t/t3007-ls-files-recurse-submodules.sh b/t/t3007-ls-files-recurse-submodules.sh
new file mode 100755
index 0000000..caf3815
--- /dev/null
+++ b/t/t3007-ls-files-recurse-submodules.sh
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+test_description='Test ls-files recurse-submodules feature
+
+This test verifies the recurse-submodules feature correctly lists files from
+submodules.
+'
+
+. ./test-lib.sh
+
+test_expect_success 'setup directory structure and submodules' '
+	echo a >a &&
+	mkdir b &&
+	echo b >b/b &&
+	git add a b &&
+	git commit -m "add a and b" &&
+	git init submodule &&
+	echo c >submodule/c &&
+	git -C submodule add c &&
+	git -C submodule commit -m "add c" &&
+	git submodule add ./submodule &&
+	git commit -m "added submodule"
+'
+
+test_expect_success 'ls-files correctly outputs files in submodule' '
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	submodule/c
+	EOF
+
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'ls-files does not output files not added to a repo' '
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	submodule/c
+	EOF
+
+	echo a >not_added &&
+	echo b >b/not_added &&
+	echo c >submodule/not_added &&
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'ls-files recurses more than 1 level' '
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	submodule/.gitmodules
+	submodule/c
+	submodule/subsub/d
+	EOF
+
+	git init submodule/subsub &&
+	echo d >submodule/subsub/d &&
+	git -C submodule/subsub add d &&
+	git -C submodule/subsub commit -m "add d" &&
+	git -C submodule submodule add ./subsub &&
+	git -C submodule commit -m "added subsub" &&
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules does not support using path arguments' '
+	test_must_fail git ls-files --recurse-submodules b 2>actual &&
+	test_i18ngrep "does not support path arguments" actual
+'
+
+test_expect_success '--recurse-submodules does not support --error-unmatch' '
+	test_must_fail git ls-files --recurse-submodules --error-unmatch 2>actual &&
+	test_i18ngrep "does not support --error-unmatch" actual
+'
+
+test_incompatible_with_recurse_submodules () {
+	test_expect_success "--recurse-submodules and $1 are incompatible" "
+		test_must_fail git ls-files --recurse-submodules $1 2>actual &&
+		test_i18ngrep 'can only be used in --cached mode' actual
+	"
+}
+
+test_incompatible_with_recurse_submodules -v
+test_incompatible_with_recurse_submodules -t
+test_incompatible_with_recurse_submodules --deleted
+test_incompatible_with_recurse_submodules --modified
+test_incompatible_with_recurse_submodules --others
+test_incompatible_with_recurse_submodules --stage
+test_incompatible_with_recurse_submodules --killed
+test_incompatible_with_recurse_submodules --unmerged
+test_incompatible_with_recurse_submodules --eol
+
+test_done
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/2] ls-files: add pathspec matching for submodules
  2016-09-21 22:04 [PATCH 1/2] ls-files: adding support for submodules Brandon Williams
@ 2016-09-21 22:04 ` Brandon Williams
  2016-09-21 22:53   ` Junio C Hamano
  2016-09-21 22:08 ` [PATCH 1/2] ls-files: adding support " Brandon Williams
  1 sibling, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 22:04 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Pathspecs can be a bit tricky when trying to apply them to submodules.
The main challenge is that the pathspecs will be with respect to the
super module and not with respect to paths in the submodule.  The
approach this patch takes is to pass in the identical pathspec from the
super module to the submodule in addition to the submodule-prefix, which
is the path from the root of the super module to the submodule, and then
we can compare an entry in the submodule prepended with the
submodule-prefix to the pathspec in order to determine if there is a
match.

This patch also permits the pathspec logic to perform a prefix match against
submodules since a pathspec could refer to a file inside of a submodule.
Due to limitations in the wildmatch logic, a prefix match is only done
literally.  If any wildcard character is encountered we'll simply punt
and produce a false positive match.  More accurate matching will be done
once inside the submodule.  This is due to the super module not knowing
what files could exist in the submodule.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 builtin/ls-files.c                     | 132 ++++++++++++++++++++-------------
 dir.c                                  |  43 ++++++++++-
 dir.h                                  |   4 +
 t/t3007-ls-files-recurse-submodules.sh | 114 ++++++++++++++++++++++++++--
 4 files changed, 231 insertions(+), 62 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index ffd9ea6..fa4029e 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -177,12 +177,34 @@ static void show_gitlink(const struct cache_entry *ce)
 {
 	struct child_process cp = CHILD_PROCESS_INIT;
 	int status;
+	int i;
 
 	argv_array_push(&cp.args, "ls-files");
 	argv_array_push(&cp.args, "--recurse-submodules");
 	argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
 			 submodule_prefix ? submodule_prefix : "",
 			 ce->name);
+	/* add options */
+	if (show_eol)
+		argv_array_push(&cp.args, "--eol");
+	if (show_valid_bit)
+		argv_array_push(&cp.args, "-v");
+	if (show_stage)
+		argv_array_push(&cp.args, "--stage");
+	if (show_cached)
+		argv_array_push(&cp.args, "--cached");
+	if (debug_mode)
+		argv_array_push(&cp.args, "--debug");
+
+	/*
+	 * Pass in the original pathspec args.  The submodule will be
+	 * responsible for prepending the 'submodule_prefix' prior to comparing
+	 * against the pathspec for matches.
+	 */
+	argv_array_push(&cp.args, "--");
+	for (i = 0; i < pathspec.nr; i++)
+		argv_array_push(&cp.args, pathspec.items[i].original);
+
 	cp.git_cmd = 1;
 	cp.dir = ce->name;
 	status = run_command(&cp);
@@ -192,57 +214,62 @@ static void show_gitlink(const struct cache_entry *ce)
 
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
+	struct strbuf name = STRBUF_INIT;
 	int len = max_prefix_len;
+	if (submodule_prefix)
+		strbuf_addstr(&name, submodule_prefix);
+	strbuf_addstr(&name, ce->name);
 
 	if (len >= ce_namelen(ce))
 		die("git ls-files: internal error - cache entry not superset of prefix");
 
-	if (!match_pathspec(&pathspec, ce->name, ce_namelen(ce),
-			    len, ps_matched,
-			    S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
-		return;
-	if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+	if (recurse_submodules && S_ISGITLINK(ce->ce_mode) &&
+	    submodule_path_match(&pathspec, name.buf, ps_matched)) {
 		show_gitlink(ce);
-		return;
-	}
+	} else if (match_pathspec(&pathspec, name.buf, name.len,
+				  len, ps_matched,
+				  S_ISDIR(ce->ce_mode) ||
+				  S_ISGITLINK(ce->ce_mode))) {
+		if (tag && *tag && show_valid_bit &&
+		    (ce->ce_flags & CE_VALID)) {
+			static char alttag[4];
+			memcpy(alttag, tag, 3);
+			if (isalpha(tag[0]))
+				alttag[0] = tolower(tag[0]);
+			else if (tag[0] == '?')
+				alttag[0] = '!';
+			else {
+				alttag[0] = 'v';
+				alttag[1] = tag[0];
+				alttag[2] = ' ';
+				alttag[3] = 0;
+			}
+			tag = alttag;
+		}
 
-	if (tag && *tag && show_valid_bit &&
-	    (ce->ce_flags & CE_VALID)) {
-		static char alttag[4];
-		memcpy(alttag, tag, 3);
-		if (isalpha(tag[0]))
-			alttag[0] = tolower(tag[0]);
-		else if (tag[0] == '?')
-			alttag[0] = '!';
-		else {
-			alttag[0] = 'v';
-			alttag[1] = tag[0];
-			alttag[2] = ' ';
-			alttag[3] = 0;
+		if (!show_stage) {
+			fputs(tag, stdout);
+		} else {
+			printf("%s%06o %s %d\t",
+			       tag,
+			       ce->ce_mode,
+			       find_unique_abbrev(ce->sha1,abbrev),
+			       ce_stage(ce));
+		}
+		write_eolinfo(ce, ce->name);
+		write_name(ce->name);
+		if (debug_mode) {
+			const struct stat_data *sd = &ce->ce_stat_data;
+
+			printf("  ctime: %d:%d\n", sd->sd_ctime.sec, sd->sd_ctime.nsec);
+			printf("  mtime: %d:%d\n", sd->sd_mtime.sec, sd->sd_mtime.nsec);
+			printf("  dev: %d\tino: %d\n", sd->sd_dev, sd->sd_ino);
+			printf("  uid: %d\tgid: %d\n", sd->sd_uid, sd->sd_gid);
+			printf("  size: %d\tflags: %x\n", sd->sd_size, ce->ce_flags);
 		}
-		tag = alttag;
 	}
 
-	if (!show_stage) {
-		fputs(tag, stdout);
-	} else {
-		printf("%s%06o %s %d\t",
-		       tag,
-		       ce->ce_mode,
-		       find_unique_abbrev(ce->sha1,abbrev),
-		       ce_stage(ce));
-	}
-	write_eolinfo(ce, ce->name);
-	write_name(ce->name);
-	if (debug_mode) {
-		const struct stat_data *sd = &ce->ce_stat_data;
-
-		printf("  ctime: %d:%d\n", sd->sd_ctime.sec, sd->sd_ctime.nsec);
-		printf("  mtime: %d:%d\n", sd->sd_mtime.sec, sd->sd_mtime.nsec);
-		printf("  dev: %d\tino: %d\n", sd->sd_dev, sd->sd_ino);
-		printf("  uid: %d\tgid: %d\n", sd->sd_uid, sd->sd_gid);
-		printf("  size: %d\tflags: %x\n", sd->sd_size, ce->ce_flags);
-	}
+	strbuf_release(&name);
 }
 
 static void show_ru_info(void)
@@ -566,27 +593,28 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 		setup_work_tree();
 
 	if (recurse_submodules &&
-	    (show_stage || show_deleted || show_others || show_unmerged ||
-	     show_killed || show_modified || show_resolve_undo ||
-	     show_valid_bit || show_tag || show_eol))
-		die("ls-files --recurse-submodules can only be used in "
-		    "--cached mode");
+	    (show_deleted || show_others || show_unmerged ||
+	     show_killed || show_modified || show_resolve_undo))
+		die("ls-files --recurse-submodules unsupported mode");
 
 	if (recurse_submodules && error_unmatch)
 		die("ls-files --recurse-submodules does not support "
 		    "--error-unmatch");
 
-	if (recurse_submodules && argc)
-		die("ls-files --recurse-submodules does not support path "
-		    "arguments");
-
 	parse_pathspec(&pathspec, 0,
 		       PATHSPEC_PREFER_CWD |
 		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
 		       prefix, argv);
 
-	/* Find common prefix for all pathspec's */
-	max_prefix = common_prefix(&pathspec);
+	/*
+	 * Find common prefix for all pathspec's
+	 * This is used as a performance optimization which unfortunately cannot
+	 * be done when recursing into submodules
+	 */
+	if (recurse_submodules)
+		max_prefix = NULL;
+	else
+		max_prefix = common_prefix(&pathspec);
 	max_prefix_len = max_prefix ? strlen(max_prefix) : 0;
 
 	/* Treat unmatching pathspec elements as errors */
diff --git a/dir.c b/dir.c
index 0ea235f..9df6d36 100644
--- a/dir.c
+++ b/dir.c
@@ -207,8 +207,9 @@ int within_depth(const char *name, int namelen,
 	return 1;
 }
 
-#define DO_MATCH_EXCLUDE   1
-#define DO_MATCH_DIRECTORY 2
+#define DO_MATCH_EXCLUDE   (1<<0)
+#define DO_MATCH_DIRECTORY (1<<1)
+#define DO_MATCH_SUBMODULE (1<<2)
 
 /*
  * Does 'match' match the given name?
@@ -283,6 +284,29 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 			 item->nowildcard_len - prefix))
 		return MATCHED_FNMATCH;
 
+	/* Perform checks to see if "name" is a super set of the pathspec */
+	if (flags & DO_MATCH_SUBMODULE) {
+		/* Check if the name is a literal prefix of the pathspec */
+		if ((item->match[namelen] == '/') &&
+		    !ps_strncmp(item, match, name, namelen))
+			return MATCHED_RECURSIVELY;
+
+		/*
+		 * Here is where we would perform a wildmatch to check if
+		 * "name" can be matched as a directory (or a prefix) against
+		 * the pathspec.  Since wildmatch doesn't have this capability
+		 * at the present we have to punt and say that it is a match,
+		 * esentially returning a false positive (as long as "name"
+		 * matches upto the first wild character).
+		 * The submodules themselves will be able to perform more
+		 * accurate matching to determine if the pathspec matches.
+		 */
+		if (item->nowildcard_len < item->len &&
+		    !ps_strncmp(item, match, name,
+				item->nowildcard_len - prefix))
+			return MATCHED_RECURSIVELY;
+	}
+
 	return 0;
 }
 
@@ -386,6 +410,21 @@ int match_pathspec(const struct pathspec *ps,
 	return negative ? 0 : positive;
 }
 
+/**
+ * Check if a submodule is a superset of the pathspec
+ */
+int submodule_path_match(const struct pathspec *ps,
+			 const char *submodule_name,
+			 char *seen)
+{
+	int matched = do_match_pathspec(ps, submodule_name,
+					strlen(submodule_name),
+					0, seen,
+					DO_MATCH_DIRECTORY |
+					DO_MATCH_SUBMODULE);
+	return matched;
+}
+
 int report_path_error(const char *ps_matched,
 		      const struct pathspec *pathspec,
 		      const char *prefix)
diff --git a/dir.h b/dir.h
index da1a858..97c83bb 100644
--- a/dir.h
+++ b/dir.h
@@ -304,6 +304,10 @@ extern int git_fnmatch(const struct pathspec_item *item,
 		       const char *pattern, const char *string,
 		       int prefix);
 
+extern int submodule_path_match(const struct pathspec *ps,
+				const char *submodule_name,
+				char *seen);
+
 static inline int ce_path_match(const struct cache_entry *ce,
 				const struct pathspec *pathspec,
 				char *seen)
diff --git a/t/t3007-ls-files-recurse-submodules.sh b/t/t3007-ls-files-recurse-submodules.sh
index caf3815..ca79fda 100755
--- a/t/t3007-ls-files-recurse-submodules.sh
+++ b/t/t3007-ls-files-recurse-submodules.sh
@@ -69,9 +69,111 @@ test_expect_success 'ls-files recurses more than 1 level' '
 	test_cmp expect actual
 '
 
-test_expect_success '--recurse-submodules does not support using path arguments' '
-	test_must_fail git ls-files --recurse-submodules b 2>actual &&
-	test_i18ngrep "does not support path arguments" actual
+test_expect_success '--recurse-submodules and pathspecs setup' '
+	echo e >submodule/subsub/e.txt &&
+	git -C submodule/subsub add e.txt &&
+	git -C submodule/subsub commit -m "adding e.txt" &&
+	echo f >submodule/f.TXT &&
+	echo g >submodule/g.txt &&
+	git -C submodule add f.TXT g.txt &&
+	git -C submodule commit -m "add f and g" &&
+	echo h >h.txt &&
+	mkdir sib &&
+	echo sib >sib/file &&
+	git add h.txt sib/file &&
+	git commit -m "add h and sib/file" &&
+	git init sub &&
+	echo sub >sub/file &&
+	git -C sub add file &&
+	git -C sub commit -m "add file" &&
+	git submodule add ./sub &&
+	git commit -m "added sub" &&
+
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	h.txt
+	sib/file
+	sub/file
+	submodule/.gitmodules
+	submodule/c
+	submodule/f.TXT
+	submodule/g.txt
+	submodule/subsub/d
+	submodule/subsub/e.txt
+	EOF
+
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual &&
+	cat actual &&
+	git ls-files --recurse-submodules "*" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	h.txt
+	submodule/g.txt
+	submodule/subsub/e.txt
+	EOF
+
+	git ls-files --recurse-submodules "*.txt" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	h.txt
+	submodule/f.TXT
+	submodule/g.txt
+	submodule/subsub/e.txt
+	EOF
+
+	git ls-files --recurse-submodules ":(icase)*.txt" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	h.txt
+	submodule/f.TXT
+	submodule/g.txt
+	EOF
+
+	git ls-files --recurse-submodules ":(icase)*.txt" ":(exclude)submodule/subsub/*" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	sub/file
+	EOF
+
+	git ls-files --recurse-submodules "sub" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "sub/" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "sub/file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "su*/file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "su?/file" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	sib/file
+	sub/file
+	EOF
+
+	git ls-files --recurse-submodules "s??/file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "s???file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "s*file" >actual &&
+	test_cmp expect actual
 '
 
 test_expect_success '--recurse-submodules does not support --error-unmatch' '
@@ -82,18 +184,14 @@ test_expect_success '--recurse-submodules does not support --error-unmatch' '
 test_incompatible_with_recurse_submodules () {
 	test_expect_success "--recurse-submodules and $1 are incompatible" "
 		test_must_fail git ls-files --recurse-submodules $1 2>actual &&
-		test_i18ngrep 'can only be used in --cached mode' actual
+		test_i18ngrep 'unsupported mode' actual
 	"
 }
 
-test_incompatible_with_recurse_submodules -v
-test_incompatible_with_recurse_submodules -t
 test_incompatible_with_recurse_submodules --deleted
 test_incompatible_with_recurse_submodules --modified
 test_incompatible_with_recurse_submodules --others
-test_incompatible_with_recurse_submodules --stage
 test_incompatible_with_recurse_submodules --killed
 test_incompatible_with_recurse_submodules --unmerged
-test_incompatible_with_recurse_submodules --eol
 
 test_done
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-21 22:04 [PATCH 1/2] ls-files: adding support for submodules Brandon Williams
  2016-09-21 22:04 ` [PATCH 2/2] ls-files: add pathspec matching " Brandon Williams
@ 2016-09-21 22:08 ` Brandon Williams
  2016-09-21 22:28   ` Junio C Hamano
  1 sibling, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 22:08 UTC (permalink / raw)
  To: git, Junio C Hamano; +Cc: Brandon Williams

This is another version of the first ls-files patch i sent out in
order.  In this version I fixed the option
 name to be '--submodule-prefix'.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-21 22:08 ` [PATCH 1/2] ls-files: adding support " Brandon Williams
@ 2016-09-21 22:28   ` Junio C Hamano
  2016-09-21 22:38     ` Brandon Williams
  0 siblings, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2016-09-21 22:28 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

> This is another version of the first ls-files patch i sent out in
> order.  In this version I fixed the option
>  name to be '--submodule-prefix'.

I understand that many internal changes in your work environment are
titled like "DOing X", but our convention around here is to label
them "DO X", as if you are giving an order to somebody else, either
telling the codebase "to be like so", or telling the patch-monkey
maintainer "to make it so".  So I'd suggest retitling it to

	ls-files: optionally recurse into submodules

or something like that.  It is an added advantage of being a lot
more descriptive than "adding support", which does not say what kind
of support it is adding.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-21 22:28   ` Junio C Hamano
@ 2016-09-21 22:38     ` Brandon Williams
  2016-09-21 22:42       ` [PATCH 1/2] ls-files: optionally recurse into submodules Brandon Williams
  2016-09-21 23:13       ` [PATCH 1/2] ls-files: adding support for submodules Junio C Hamano
  0 siblings, 2 replies; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 22:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

yes you mentioned this and I meant to change that before sending it out.
Looks like it slipped through have slipped through.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/2] ls-files: optionally recurse into submodules
  2016-09-21 22:38     ` Brandon Williams
@ 2016-09-21 22:42       ` Brandon Williams
  2016-09-22  6:20         ` Jeff King
  2016-09-21 23:13       ` [PATCH 1/2] ls-files: adding support for submodules Junio C Hamano
  1 sibling, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 22:42 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Allow ls-files to recognize submodules in order to retrieve a list of
files from a repository's submodules.  This is done by forking off a
process to recursively call ls-files on all submodules. Also added a
submodule-prefix command in order to prepend paths to child processes.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 Documentation/git-ls-files.txt         | 11 +++-
 builtin/ls-files.c                     | 61 +++++++++++++++++++++
 t/t3007-ls-files-recurse-submodules.sh | 99 ++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100755 t/t3007-ls-files-recurse-submodules.sh

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0d933ac..09e4449 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -18,7 +18,9 @@ SYNOPSIS
 		[--exclude-per-directory=<file>]
 		[--exclude-standard]
 		[--error-unmatch] [--with-tree=<tree-ish>]
-		[--full-name] [--abbrev] [--] [<file>...]
+		[--full-name] [--recurse-submodules]
+		[--submodule-prefix=<path>]
+		[--abbrev] [--] [<file>...]
 
 DESCRIPTION
 -----------
@@ -137,6 +139,13 @@ a space) at the start of each line:
 	option forces paths to be output relative to the project
 	top directory.
 
+--recurse-submodules::
+	Recursively calls ls-files on each submodule in the repository.
+	Currently there is only support for the --cached mode.
+
+--submodule-prefix=<path>::
+	Prepend the provided path to the output of each file
+
 --abbrev[=<n>]::
 	Instead of showing the full 40-byte hexadecimal object
 	lines, show only a partial prefix.
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 00ea91a..ffd9ea6 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "string-list.h"
 #include "pathspec.h"
+#include "run-command.h"
 
 static int abbrev;
 static int show_deleted;
@@ -28,6 +29,8 @@ static int show_valid_bit;
 static int line_terminator = '\n';
 static int debug_mode;
 static int show_eol;
+static const char *submodule_prefix;
+static int recurse_submodules;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -68,6 +71,21 @@ static void write_eolinfo(const struct cache_entry *ce, const char *path)
 static void write_name(const char *name)
 {
 	/*
+	 * NEEDSWORK: To make this thread-safe, full_name would have to be owned
+	 * by the caller.
+	 *
+	 * full_name get reused across output lines to minimize the allocation
+	 * churn.
+	 */
+	static struct strbuf full_name = STRBUF_INIT;
+	if (submodule_prefix && *submodule_prefix) {
+		strbuf_reset(&full_name);
+		strbuf_addstr(&full_name, submodule_prefix);
+		strbuf_addstr(&full_name, name);
+		name = full_name.buf;
+	}
+
+	/*
 	 * With "--full-name", prefix_len=0; this caller needs to pass
 	 * an empty string in that case (a NULL is good for "").
 	 */
@@ -152,6 +170,26 @@ static void show_killed_files(struct dir_struct *dir)
 	}
 }
 
+/**
+ * Recursively call ls-files on a submodule
+ */
+static void show_gitlink(const struct cache_entry *ce)
+{
+	struct child_process cp = CHILD_PROCESS_INIT;
+	int status;
+
+	argv_array_push(&cp.args, "ls-files");
+	argv_array_push(&cp.args, "--recurse-submodules");
+	argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
+			 submodule_prefix ? submodule_prefix : "",
+			 ce->name);
+	cp.git_cmd = 1;
+	cp.dir = ce->name;
+	status = run_command(&cp);
+	if (status)
+		exit(status);
+}
+
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
 	int len = max_prefix_len;
@@ -163,6 +201,10 @@ static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 			    len, ps_matched,
 			    S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
 		return;
+	if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+		show_gitlink(ce);
+		return;
+	}
 
 	if (tag && *tag && show_valid_bit &&
 	    (ce->ce_flags & CE_VALID)) {
@@ -468,6 +510,10 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 		{ OPTION_SET_INT, 0, "full-name", &prefix_len, NULL,
 			N_("make the output relative to the project top directory"),
 			PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL },
+		OPT_STRING(0, "submodule-prefix", &submodule_prefix,
+			N_("path"), N_("prepend <path> to each file")),
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("recurse through submodules")),
 		OPT_BOOL(0, "error-unmatch", &error_unmatch,
 			N_("if any <file> is not in the index, treat this as an error")),
 		OPT_STRING(0, "with-tree", &with_tree, N_("tree-ish"),
@@ -519,6 +565,21 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 	if (require_work_tree && !is_inside_work_tree())
 		setup_work_tree();
 
+	if (recurse_submodules &&
+	    (show_stage || show_deleted || show_others || show_unmerged ||
+	     show_killed || show_modified || show_resolve_undo ||
+	     show_valid_bit || show_tag || show_eol))
+		die("ls-files --recurse-submodules can only be used in "
+		    "--cached mode");
+
+	if (recurse_submodules && error_unmatch)
+		die("ls-files --recurse-submodules does not support "
+		    "--error-unmatch");
+
+	if (recurse_submodules && argc)
+		die("ls-files --recurse-submodules does not support path "
+		    "arguments");
+
 	parse_pathspec(&pathspec, 0,
 		       PATHSPEC_PREFER_CWD |
 		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
diff --git a/t/t3007-ls-files-recurse-submodules.sh b/t/t3007-ls-files-recurse-submodules.sh
new file mode 100755
index 0000000..caf3815
--- /dev/null
+++ b/t/t3007-ls-files-recurse-submodules.sh
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+test_description='Test ls-files recurse-submodules feature
+
+This test verifies the recurse-submodules feature correctly lists files from
+submodules.
+'
+
+. ./test-lib.sh
+
+test_expect_success 'setup directory structure and submodules' '
+	echo a >a &&
+	mkdir b &&
+	echo b >b/b &&
+	git add a b &&
+	git commit -m "add a and b" &&
+	git init submodule &&
+	echo c >submodule/c &&
+	git -C submodule add c &&
+	git -C submodule commit -m "add c" &&
+	git submodule add ./submodule &&
+	git commit -m "added submodule"
+'
+
+test_expect_success 'ls-files correctly outputs files in submodule' '
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	submodule/c
+	EOF
+
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'ls-files does not output files not added to a repo' '
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	submodule/c
+	EOF
+
+	echo a >not_added &&
+	echo b >b/not_added &&
+	echo c >submodule/not_added &&
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'ls-files recurses more than 1 level' '
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	submodule/.gitmodules
+	submodule/c
+	submodule/subsub/d
+	EOF
+
+	git init submodule/subsub &&
+	echo d >submodule/subsub/d &&
+	git -C submodule/subsub add d &&
+	git -C submodule/subsub commit -m "add d" &&
+	git -C submodule submodule add ./subsub &&
+	git -C submodule commit -m "added subsub" &&
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules does not support using path arguments' '
+	test_must_fail git ls-files --recurse-submodules b 2>actual &&
+	test_i18ngrep "does not support path arguments" actual
+'
+
+test_expect_success '--recurse-submodules does not support --error-unmatch' '
+	test_must_fail git ls-files --recurse-submodules --error-unmatch 2>actual &&
+	test_i18ngrep "does not support --error-unmatch" actual
+'
+
+test_incompatible_with_recurse_submodules () {
+	test_expect_success "--recurse-submodules and $1 are incompatible" "
+		test_must_fail git ls-files --recurse-submodules $1 2>actual &&
+		test_i18ngrep 'can only be used in --cached mode' actual
+	"
+}
+
+test_incompatible_with_recurse_submodules -v
+test_incompatible_with_recurse_submodules -t
+test_incompatible_with_recurse_submodules --deleted
+test_incompatible_with_recurse_submodules --modified
+test_incompatible_with_recurse_submodules --others
+test_incompatible_with_recurse_submodules --stage
+test_incompatible_with_recurse_submodules --killed
+test_incompatible_with_recurse_submodules --unmerged
+test_incompatible_with_recurse_submodules --eol
+
+test_done
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2] ls-files: add pathspec matching for submodules
  2016-09-21 22:04 ` [PATCH 2/2] ls-files: add pathspec matching " Brandon Williams
@ 2016-09-21 22:53   ` Junio C Hamano
  2016-09-21 23:23     ` Brandon Williams
  0 siblings, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2016-09-21 22:53 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

> Pathspecs can be a bit tricky when trying to apply them to submodules.
> The main challenge is that the pathspecs will be with respect to the
> super module and not with respect to paths in the submodule.  The
> approach this patch takes is to pass in the identical pathspec from the
> super module to the submodule in addition to the submodule-prefix, which
> is the path from the root of the super module to the submodule, and then
> we can compare an entry in the submodule prepended with the
> submodule-prefix to the pathspec in order to determine if there is a
> match.
>
> This patch also permits the pathspec logic to perform a prefix match against
> submodules since a pathspec could refer to a file inside of a submodule.
> Due to limitations in the wildmatch logic, a prefix match is only done
> literally.  If any wildcard character is encountered we'll simply punt
> and produce a false positive match.  More accurate matching will be done
> once inside the submodule.  This is due to the super module not knowing
> what files could exist in the submodule.

Sounds sensible.  Just a minor nit in terminology, but I think we
fairly consistently say "a superproject contains submodules" (run
"git grep -E 'super *(module|project)'").

I'd suggest s/super module/superproject/ for consistency.

> diff --git a/dir.c b/dir.c
> index 0ea235f..9df6d36 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -207,8 +207,9 @@ int within_depth(const char *name, int namelen,
>  	return 1;
>  }
>  
> -#define DO_MATCH_EXCLUDE   1
> -#define DO_MATCH_DIRECTORY 2
> +#define DO_MATCH_EXCLUDE   (1<<0)
> +#define DO_MATCH_DIRECTORY (1<<1)
> +#define DO_MATCH_SUBMODULE (1<<2)
>  
>  /*
>   * Does 'match' match the given name?
> @@ -283,6 +284,29 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
>  			 item->nowildcard_len - prefix))
>  		return MATCHED_FNMATCH;
>  
> +	/* Perform checks to see if "name" is a super set of the pathspec */
> +	if (flags & DO_MATCH_SUBMODULE) {
> +		/* Check if the name is a literal prefix of the pathspec */
> +		if ((item->match[namelen] == '/') &&
> +		    !ps_strncmp(item, match, name, namelen))
> +			return MATCHED_RECURSIVELY;

An example of this test would be to match pathspec "sub/file" with
submodule path "sub"?

item->match[namelen] is accessed without checking if item->match[]
is long enough here; shouldn't item->len be checked before doing
that?

> +		/*
> +		 * Here is where we would perform a wildmatch to check if
> +		 * "name" can be matched as a directory (or a prefix) against
> +		 * the pathspec.  Since wildmatch doesn't have this capability
> +		 * at the present we have to punt and say that it is a match,
> +		 * esentially returning a false positive (as long as "name"
> +		 * matches upto the first wild character).
> +		 * The submodules themselves will be able to perform more
> +		 * accurate matching to determine if the pathspec matches.
> +		 */
> +		if (item->nowildcard_len < item->len &&
> +		    !ps_strncmp(item, match, name,
> +				item->nowildcard_len - prefix))
> +			return MATCHED_RECURSIVELY;

An example of this test would be to match pathspec "su?/file" with
submodule path "sub", where the substring up to nowildcard_len is
the leading literal string "su" that must match with the path (in
other words, a path "sib" will not match "su?/file").

> +	}
> +

Hmph, isn't this the one that is allowed produce false positive but
cannot afford to give any false negative?  It feels a bit strange
that the code checks two cases where we can positively say that it
is worth descending into, and falling through would give "no this
will never match".  That sounds like invitation for false negatives.

IOW, I would have expected

        if (flags & DO_MATCH_SUBMODULE) {
                if (may match in this case)
                        return MATCHED_RECURSIVE;
                if (may match in this other case)
                        return MATCHED_RECURSIVE;
                ...
                if (obviously cannot match in this case)
                        return 0;
                if (obviously cannot match in this other case)
                        return 0;
                /* otherwise we cannot say */
                return MATCHED_RECURSIVELY;
        }

as the general code structure.

Fully spelled out,

        if (flags & DO_MATCH_SUBMODULE) {
                /* Check if the name is a literal prefix of the pathspec */
                if (namelen < item->len &&
                    item->match[namelen] == '/' &&
                    !ps_strncmp(item, match, name, namelen))
                        return MATCHED_RECURSIVE;

                /* Does the literal leading part have chance of matching? */
                if (item->nowildcard_len < item->len &&
                    namelen <= item->nowildcard_len &&
                    ps_strncmp(item, match, name, namelen))
                        return 0; /* no way "su?/file" can match "sib" */

                /* Otherwise we cannot say */
                return MATCHED_RECURSIVELY;
        }

or something like that.  There may be other "obviously cannot match"
cases we may want to add further.

Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-21 22:38     ` Brandon Williams
  2016-09-21 22:42       ` [PATCH 1/2] ls-files: optionally recurse into submodules Brandon Williams
@ 2016-09-21 23:13       ` Junio C Hamano
  2016-09-22  4:18         ` Jeff King
  1 sibling, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2016-09-21 23:13 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

> yes you mentioned this and I meant to change that before sending it out.
> Looks like it slipped through have slipped through.

I already fixed it up locally when I sent the reply, but thanks for
resending (which assures me that your local copy is up-to-date and I
do not have to worry about having to repeat me in the future, if
this ever needs further rerolling ;-).

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2] ls-files: add pathspec matching for submodules
  2016-09-21 22:53   ` Junio C Hamano
@ 2016-09-21 23:23     ` Brandon Williams
  2016-09-21 23:28       ` [PATCH 2/2 v2] " Brandon Williams
  0 siblings, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 23:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Wed, Sep 21, 2016 at 3:53 PM, Junio C Hamano <gitster@pobox.com> wrote:
>
> Sounds sensible.  Just a minor nit in terminology, but I think we
> fairly consistently say "a superproject contains submodules" (run
> "git grep -E 'super *(module|project)'").
>
> I'd suggest s/super module/superproject/ for consistency.

Will do.

> An example of this test would be to match pathspec "sub/file" with
> submodule path "sub"?

Yep,  I believe there's a test for that case

> item->match[namelen] is accessed without checking if item->match[]
> is long enough here; shouldn't item->len be checked before doing
> that?

Oh right!  Good catch.

>
> Hmph, isn't this the one that is allowed produce false positive but
> cannot afford to give any false negative?  It feels a bit strange
> that the code checks two cases where we can positively say that it
> is worth descending into, and falling through would give "no this
> will never match".  That sounds like invitation for false negatives.
>
> IOW, I would have expected
>
>         if (flags & DO_MATCH_SUBMODULE) {
>                 if (may match in this case)
>                         return MATCHED_RECURSIVE;
>                 if (may match in this other case)
>                         return MATCHED_RECURSIVE;
>                 ...
>                 if (obviously cannot match in this case)
>                         return 0;
>                 if (obviously cannot match in this other case)
>                         return 0;
>                 /* otherwise we cannot say */
>                 return MATCHED_RECURSIVELY;
>         }
>
> as the general code structure.
>
> Fully spelled out,
>
>         if (flags & DO_MATCH_SUBMODULE) {
>                 /* Check if the name is a literal prefix of the pathspec */
>                 if (namelen < item->len &&
>                     item->match[namelen] == '/' &&
>                     !ps_strncmp(item, match, name, namelen))
>                         return MATCHED_RECURSIVE;
>
>                 /* Does the literal leading part have chance of matching? */
>                 if (item->nowildcard_len < item->len &&
>                     namelen <= item->nowildcard_len &&
>                     ps_strncmp(item, match, name, namelen))
>                         return 0; /* no way "su?/file" can match "sib" */
>
>                 /* Otherwise we cannot say */
>                 return MATCHED_RECURSIVELY;
>         }
>
> or something like that.  There may be other "obviously cannot match"
> cases we may want to add further.
>
> Thanks.

You're right it should be structured the other way.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 2/2 v2] ls-files: add pathspec matching for submodules
  2016-09-21 23:23     ` Brandon Williams
@ 2016-09-21 23:28       ` Brandon Williams
  2016-09-23 18:48         ` Junio C Hamano
  2016-09-23 19:20         ` Junio C Hamano
  0 siblings, 2 replies; 29+ messages in thread
From: Brandon Williams @ 2016-09-21 23:28 UTC (permalink / raw)
  To: git; +Cc: Brandon Williams

Pathspecs can be a bit tricky when trying to apply them to submodules.
The main challenge is that the pathspecs will be with respect to the
superproject and not with respect to paths in the submodule.  The
approach this patch takes is to pass in the identical pathspec from the
superproject to the submodule in addition to the submodule-prefix, which
is the path from the root of the superproject to the submodule, and then
we can compare an entry in the submodule prepended with the
submodule-prefix to the pathspec in order to determine if there is a
match.

This patch also permits the pathspec logic to perform a prefix match against
submodules since a pathspec could refer to a file inside of a submodule.
Due to limitations in the wildmatch logic, a prefix match is only done
literally.  If any wildcard character is encountered we'll simply punt
and produce a false positive match.  More accurate matching will be done
once inside the submodule.  This is due to the superproject not knowing
what files could exist in the submodule.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 builtin/ls-files.c                     | 132 ++++++++++++++++++++-------------
 dir.c                                  |  46 +++++++++++-
 dir.h                                  |   4 +
 t/t3007-ls-files-recurse-submodules.sh | 114 ++++++++++++++++++++++++++--
 4 files changed, 234 insertions(+), 62 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index ffd9ea6..fa4029e 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -177,12 +177,34 @@ static void show_gitlink(const struct cache_entry *ce)
 {
 	struct child_process cp = CHILD_PROCESS_INIT;
 	int status;
+	int i;
 
 	argv_array_push(&cp.args, "ls-files");
 	argv_array_push(&cp.args, "--recurse-submodules");
 	argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
 			 submodule_prefix ? submodule_prefix : "",
 			 ce->name);
+	/* add options */
+	if (show_eol)
+		argv_array_push(&cp.args, "--eol");
+	if (show_valid_bit)
+		argv_array_push(&cp.args, "-v");
+	if (show_stage)
+		argv_array_push(&cp.args, "--stage");
+	if (show_cached)
+		argv_array_push(&cp.args, "--cached");
+	if (debug_mode)
+		argv_array_push(&cp.args, "--debug");
+
+	/*
+	 * Pass in the original pathspec args.  The submodule will be
+	 * responsible for prepending the 'submodule_prefix' prior to comparing
+	 * against the pathspec for matches.
+	 */
+	argv_array_push(&cp.args, "--");
+	for (i = 0; i < pathspec.nr; i++)
+		argv_array_push(&cp.args, pathspec.items[i].original);
+
 	cp.git_cmd = 1;
 	cp.dir = ce->name;
 	status = run_command(&cp);
@@ -192,57 +214,62 @@ static void show_gitlink(const struct cache_entry *ce)
 
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
+	struct strbuf name = STRBUF_INIT;
 	int len = max_prefix_len;
+	if (submodule_prefix)
+		strbuf_addstr(&name, submodule_prefix);
+	strbuf_addstr(&name, ce->name);
 
 	if (len >= ce_namelen(ce))
 		die("git ls-files: internal error - cache entry not superset of prefix");
 
-	if (!match_pathspec(&pathspec, ce->name, ce_namelen(ce),
-			    len, ps_matched,
-			    S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
-		return;
-	if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+	if (recurse_submodules && S_ISGITLINK(ce->ce_mode) &&
+	    submodule_path_match(&pathspec, name.buf, ps_matched)) {
 		show_gitlink(ce);
-		return;
-	}
+	} else if (match_pathspec(&pathspec, name.buf, name.len,
+				  len, ps_matched,
+				  S_ISDIR(ce->ce_mode) ||
+				  S_ISGITLINK(ce->ce_mode))) {
+		if (tag && *tag && show_valid_bit &&
+		    (ce->ce_flags & CE_VALID)) {
+			static char alttag[4];
+			memcpy(alttag, tag, 3);
+			if (isalpha(tag[0]))
+				alttag[0] = tolower(tag[0]);
+			else if (tag[0] == '?')
+				alttag[0] = '!';
+			else {
+				alttag[0] = 'v';
+				alttag[1] = tag[0];
+				alttag[2] = ' ';
+				alttag[3] = 0;
+			}
+			tag = alttag;
+		}
 
-	if (tag && *tag && show_valid_bit &&
-	    (ce->ce_flags & CE_VALID)) {
-		static char alttag[4];
-		memcpy(alttag, tag, 3);
-		if (isalpha(tag[0]))
-			alttag[0] = tolower(tag[0]);
-		else if (tag[0] == '?')
-			alttag[0] = '!';
-		else {
-			alttag[0] = 'v';
-			alttag[1] = tag[0];
-			alttag[2] = ' ';
-			alttag[3] = 0;
+		if (!show_stage) {
+			fputs(tag, stdout);
+		} else {
+			printf("%s%06o %s %d\t",
+			       tag,
+			       ce->ce_mode,
+			       find_unique_abbrev(ce->sha1,abbrev),
+			       ce_stage(ce));
+		}
+		write_eolinfo(ce, ce->name);
+		write_name(ce->name);
+		if (debug_mode) {
+			const struct stat_data *sd = &ce->ce_stat_data;
+
+			printf("  ctime: %d:%d\n", sd->sd_ctime.sec, sd->sd_ctime.nsec);
+			printf("  mtime: %d:%d\n", sd->sd_mtime.sec, sd->sd_mtime.nsec);
+			printf("  dev: %d\tino: %d\n", sd->sd_dev, sd->sd_ino);
+			printf("  uid: %d\tgid: %d\n", sd->sd_uid, sd->sd_gid);
+			printf("  size: %d\tflags: %x\n", sd->sd_size, ce->ce_flags);
 		}
-		tag = alttag;
 	}
 
-	if (!show_stage) {
-		fputs(tag, stdout);
-	} else {
-		printf("%s%06o %s %d\t",
-		       tag,
-		       ce->ce_mode,
-		       find_unique_abbrev(ce->sha1,abbrev),
-		       ce_stage(ce));
-	}
-	write_eolinfo(ce, ce->name);
-	write_name(ce->name);
-	if (debug_mode) {
-		const struct stat_data *sd = &ce->ce_stat_data;
-
-		printf("  ctime: %d:%d\n", sd->sd_ctime.sec, sd->sd_ctime.nsec);
-		printf("  mtime: %d:%d\n", sd->sd_mtime.sec, sd->sd_mtime.nsec);
-		printf("  dev: %d\tino: %d\n", sd->sd_dev, sd->sd_ino);
-		printf("  uid: %d\tgid: %d\n", sd->sd_uid, sd->sd_gid);
-		printf("  size: %d\tflags: %x\n", sd->sd_size, ce->ce_flags);
-	}
+	strbuf_release(&name);
 }
 
 static void show_ru_info(void)
@@ -566,27 +593,28 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 		setup_work_tree();
 
 	if (recurse_submodules &&
-	    (show_stage || show_deleted || show_others || show_unmerged ||
-	     show_killed || show_modified || show_resolve_undo ||
-	     show_valid_bit || show_tag || show_eol))
-		die("ls-files --recurse-submodules can only be used in "
-		    "--cached mode");
+	    (show_deleted || show_others || show_unmerged ||
+	     show_killed || show_modified || show_resolve_undo))
+		die("ls-files --recurse-submodules unsupported mode");
 
 	if (recurse_submodules && error_unmatch)
 		die("ls-files --recurse-submodules does not support "
 		    "--error-unmatch");
 
-	if (recurse_submodules && argc)
-		die("ls-files --recurse-submodules does not support path "
-		    "arguments");
-
 	parse_pathspec(&pathspec, 0,
 		       PATHSPEC_PREFER_CWD |
 		       PATHSPEC_STRIP_SUBMODULE_SLASH_CHEAP,
 		       prefix, argv);
 
-	/* Find common prefix for all pathspec's */
-	max_prefix = common_prefix(&pathspec);
+	/*
+	 * Find common prefix for all pathspec's
+	 * This is used as a performance optimization which unfortunately cannot
+	 * be done when recursing into submodules
+	 */
+	if (recurse_submodules)
+		max_prefix = NULL;
+	else
+		max_prefix = common_prefix(&pathspec);
 	max_prefix_len = max_prefix ? strlen(max_prefix) : 0;
 
 	/* Treat unmatching pathspec elements as errors */
diff --git a/dir.c b/dir.c
index 0ea235f..28e9736 100644
--- a/dir.c
+++ b/dir.c
@@ -207,8 +207,9 @@ int within_depth(const char *name, int namelen,
 	return 1;
 }
 
-#define DO_MATCH_EXCLUDE   1
-#define DO_MATCH_DIRECTORY 2
+#define DO_MATCH_EXCLUDE   (1<<0)
+#define DO_MATCH_DIRECTORY (1<<1)
+#define DO_MATCH_SUBMODULE (1<<2)
 
 /*
  * Does 'match' match the given name?
@@ -283,6 +284,32 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 			 item->nowildcard_len - prefix))
 		return MATCHED_FNMATCH;
 
+	/* Perform checks to see if "name" is a super set of the pathspec */
+	if (flags & DO_MATCH_SUBMODULE) {
+		/* name is a literal prefix of the pathspec */
+		if ((namelen < matchlen) &&
+		    (match[namelen] == '/') &&
+		    !ps_strncmp(item, match, name, namelen))
+			return MATCHED_RECURSIVELY;
+
+		/* name" doesn't match up to the first wild character */
+		if (item->nowildcard_len < item->len &&
+		    ps_strncmp(item, match, name,
+			       item->nowildcard_len - prefix))
+			return 0;
+
+		/*
+		 * Here is where we would perform a wildmatch to check if
+		 * "name" can be matched as a directory (or a prefix) against
+		 * the pathspec.  Since wildmatch doesn't have this capability
+		 * at the present we have to punt and say that it is a match,
+		 * potentially returning a false positive
+		 * The submodules themselves will be able to perform more
+		 * accurate matching to determine if the pathspec matches.
+		 */
+		return MATCHED_RECURSIVELY;
+	}
+
 	return 0;
 }
 
@@ -386,6 +413,21 @@ int match_pathspec(const struct pathspec *ps,
 	return negative ? 0 : positive;
 }
 
+/**
+ * Check if a submodule is a superset of the pathspec
+ */
+int submodule_path_match(const struct pathspec *ps,
+			 const char *submodule_name,
+			 char *seen)
+{
+	int matched = do_match_pathspec(ps, submodule_name,
+					strlen(submodule_name),
+					0, seen,
+					DO_MATCH_DIRECTORY |
+					DO_MATCH_SUBMODULE);
+	return matched;
+}
+
 int report_path_error(const char *ps_matched,
 		      const struct pathspec *pathspec,
 		      const char *prefix)
diff --git a/dir.h b/dir.h
index da1a858..97c83bb 100644
--- a/dir.h
+++ b/dir.h
@@ -304,6 +304,10 @@ extern int git_fnmatch(const struct pathspec_item *item,
 		       const char *pattern, const char *string,
 		       int prefix);
 
+extern int submodule_path_match(const struct pathspec *ps,
+				const char *submodule_name,
+				char *seen);
+
 static inline int ce_path_match(const struct cache_entry *ce,
 				const struct pathspec *pathspec,
 				char *seen)
diff --git a/t/t3007-ls-files-recurse-submodules.sh b/t/t3007-ls-files-recurse-submodules.sh
index caf3815..ca79fda 100755
--- a/t/t3007-ls-files-recurse-submodules.sh
+++ b/t/t3007-ls-files-recurse-submodules.sh
@@ -69,9 +69,111 @@ test_expect_success 'ls-files recurses more than 1 level' '
 	test_cmp expect actual
 '
 
-test_expect_success '--recurse-submodules does not support using path arguments' '
-	test_must_fail git ls-files --recurse-submodules b 2>actual &&
-	test_i18ngrep "does not support path arguments" actual
+test_expect_success '--recurse-submodules and pathspecs setup' '
+	echo e >submodule/subsub/e.txt &&
+	git -C submodule/subsub add e.txt &&
+	git -C submodule/subsub commit -m "adding e.txt" &&
+	echo f >submodule/f.TXT &&
+	echo g >submodule/g.txt &&
+	git -C submodule add f.TXT g.txt &&
+	git -C submodule commit -m "add f and g" &&
+	echo h >h.txt &&
+	mkdir sib &&
+	echo sib >sib/file &&
+	git add h.txt sib/file &&
+	git commit -m "add h and sib/file" &&
+	git init sub &&
+	echo sub >sub/file &&
+	git -C sub add file &&
+	git -C sub commit -m "add file" &&
+	git submodule add ./sub &&
+	git commit -m "added sub" &&
+
+	cat >expect <<-\EOF &&
+	.gitmodules
+	a
+	b/b
+	h.txt
+	sib/file
+	sub/file
+	submodule/.gitmodules
+	submodule/c
+	submodule/f.TXT
+	submodule/g.txt
+	submodule/subsub/d
+	submodule/subsub/e.txt
+	EOF
+
+	git ls-files --recurse-submodules >actual &&
+	test_cmp expect actual &&
+	cat actual &&
+	git ls-files --recurse-submodules "*" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	h.txt
+	submodule/g.txt
+	submodule/subsub/e.txt
+	EOF
+
+	git ls-files --recurse-submodules "*.txt" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	h.txt
+	submodule/f.TXT
+	submodule/g.txt
+	submodule/subsub/e.txt
+	EOF
+
+	git ls-files --recurse-submodules ":(icase)*.txt" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	h.txt
+	submodule/f.TXT
+	submodule/g.txt
+	EOF
+
+	git ls-files --recurse-submodules ":(icase)*.txt" ":(exclude)submodule/subsub/*" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	sub/file
+	EOF
+
+	git ls-files --recurse-submodules "sub" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "sub/" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "sub/file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "su*/file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "su?/file" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success '--recurse-submodules and pathspecs' '
+	cat >expect <<-\EOF &&
+	sib/file
+	sub/file
+	EOF
+
+	git ls-files --recurse-submodules "s??/file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "s???file" >actual &&
+	test_cmp expect actual &&
+	git ls-files --recurse-submodules "s*file" >actual &&
+	test_cmp expect actual
 '
 
 test_expect_success '--recurse-submodules does not support --error-unmatch' '
@@ -82,18 +184,14 @@ test_expect_success '--recurse-submodules does not support --error-unmatch' '
 test_incompatible_with_recurse_submodules () {
 	test_expect_success "--recurse-submodules and $1 are incompatible" "
 		test_must_fail git ls-files --recurse-submodules $1 2>actual &&
-		test_i18ngrep 'can only be used in --cached mode' actual
+		test_i18ngrep 'unsupported mode' actual
 	"
 }
 
-test_incompatible_with_recurse_submodules -v
-test_incompatible_with_recurse_submodules -t
 test_incompatible_with_recurse_submodules --deleted
 test_incompatible_with_recurse_submodules --modified
 test_incompatible_with_recurse_submodules --others
-test_incompatible_with_recurse_submodules --stage
 test_incompatible_with_recurse_submodules --killed
 test_incompatible_with_recurse_submodules --unmerged
-test_incompatible_with_recurse_submodules --eol
 
 test_done
-- 
2.8.0.rc3.226.g39d4020


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-21 23:13       ` [PATCH 1/2] ls-files: adding support for submodules Junio C Hamano
@ 2016-09-22  4:18         ` Jeff King
  2016-09-22 16:04           ` Stefan Beller
  2016-09-22 18:13           ` Junio C Hamano
  0 siblings, 2 replies; 29+ messages in thread
From: Jeff King @ 2016-09-22  4:18 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Brandon Williams, git

On Wed, Sep 21, 2016 at 04:13:22PM -0700, Junio C Hamano wrote:

> Brandon Williams <bmwill@google.com> writes:
> 
> > yes you mentioned this and I meant to change that before sending it out.
> > Looks like it slipped through have slipped through.
> 
> I already fixed it up locally when I sent the reply, but thanks for
> resending (which assures me that your local copy is up-to-date and I
> do not have to worry about having to repeat me in the future, if
> this ever needs further rerolling ;-).

While we are on the subject, the commit message also uses some past
tense:

  Allow ls-files to recognize submodules in order to retrieve a list of
  files from a repository's submodules.  This is done by forking off a
  process to recursively call ls-files on all submodules. Also added a
  submodule-prefix command in order to prepend paths to child processes.

The final sentence should be "Also add...".

Since this final bit of logic was sufficiently non-obvious that it only
came about in v2, maybe it is worth describing a little more fully:

  Also add a submodule-prefix option, which instructs the child
  processes to prepend the prefix to each path they output. This makes
  the output paths match what is on the filesystem (i.e., as if the
  submodule boundaries were not there at all).

Should this option just be "--prefix", or maybe "--output-prefix"?
Submodules are the obvious use case here, but I could see somebody
adapting this for other uses (alternatively, if we _do_ want to keep it
just as an implementation detail for submodules, we should probably
discourage people in the documentation from using it themselves).

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: optionally recurse into submodules
  2016-09-21 22:42       ` [PATCH 1/2] ls-files: optionally recurse into submodules Brandon Williams
@ 2016-09-22  6:20         ` Jeff King
  2016-09-23 23:31           ` Brandon Williams
  0 siblings, 1 reply; 29+ messages in thread
From: Jeff King @ 2016-09-22  6:20 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

On Wed, Sep 21, 2016 at 03:42:52PM -0700, Brandon Williams wrote:

> @@ -68,6 +71,21 @@ static void write_eolinfo(const struct cache_entry *ce, const char *path)
>  static void write_name(const char *name)
>  {
>  	/*
> +	 * NEEDSWORK: To make this thread-safe, full_name would have to be owned
> +	 * by the caller.

I'm not sure if that is quite true. You could simply drop the "static"
here, and pay the malloc cost each time. But if we want to amortize the
allocation, then yeah, somebody else on the stack needs to own it.

That being said, I don't know if it is worth pointing this out as a
NEEDSWORK or not. Most of git is not thread-safe, so I think that's
generally the norm.

> +	 *
> +	 * full_name get reused across output lines to minimize the allocation
> +	 * churn.

Likewise, this kind of strbuf-reuse trickery is common in git. I don't
know if it's worth a comment or not (though I mind this one much less,
because it's a bit more subtle, being across multiple calls rather than
in a single loop).

(To be clear, I actually don't mind either _that_ much; usually the
problem is under-commenting, not over-commenting. I am just trying to
give style pointers since you are new to the project).

> +	static struct strbuf full_name = STRBUF_INIT;
> +	if (submodule_prefix && *submodule_prefix) {
> +		strbuf_reset(&full_name);
> +		strbuf_addstr(&full_name, submodule_prefix);
> +		strbuf_addstr(&full_name, name);
> +		name = full_name.buf;
> +	}

I actually wonder if it would be more obvious if we hoisted out the
prefix copy, too. IOW, have a global:

  /* used to assemble full names in place */
  static struct strbuf prefixed_name = STRBUF_INIT;
  static size_t prefixed_name_base;

then parse --submodule-prefix as:

  strbuf_reset(&prefixed_name);
  strbuf_addstr(&prefixed_name, arg);
  prefixed_name_base = prefixed_name.len;

and then here do:

  if (prefixed_name_base) {
	strbuf_addstr(&prefixed_name, name);
	name = prefixed_name.buf;
  }
  ... do stuff with name ...
  if (prefixed_name_base)
	strbuf_setlen(&prefixed_name, prefixed_name_base);

I dunno. It is slightly more efficient (we do not memcpy the prefix over
and over), but I doubt that matters much in practice. I just wonder if
it makes the buffer reuse a bit more obvious.

> +/**
> + * Recursively call ls-files on a submodule
> + */
> +static void show_gitlink(const struct cache_entry *ce)
> +{
> +	struct child_process cp = CHILD_PROCESS_INIT;
> +	int status;
> +
> +	argv_array_push(&cp.args, "ls-files");
> +	argv_array_push(&cp.args, "--recurse-submodules");
> +	argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
> +			 submodule_prefix ? submodule_prefix : "",
> +			 ce->name);
> +	cp.git_cmd = 1;
> +	cp.dir = ce->name;
> +	status = run_command(&cp);
> +	if (status)
> +		exit(status);
> +}

This doesn't propagate the parent argv at all. So if I run:

  git ls-files -z --recurse-submodules

then the paths are all NUL-terminated in the parent, but
newline-terminated in the submodules. Oops.

I see in the second patch you make an effort to pass in some specific
options explicitly. In some ways that is safer (we do not accidentally
pass an argument that needs to be munged). But in some ways it is less
safe (as shown by "-z" above, anything we fail to pass in show_gitlink()
must be explicitly blocked in cmd_ls_files(), or we produce garbage
output). It also means that we have a bunch of boilerplate forwarding
the options along (e.g., your second patch lets through --stage, but why
not other ones like "--deleted"?).

So I dunno. I would have done it more like:

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 03c283e..2a3d65c 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -31,6 +31,7 @@ static int debug_mode;
 static int show_eol;
 static const char *submodule_prefix;
 static int recurse_submodules;
+static struct argv_array recurse_argv = ARGV_ARRAY_INIT;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -170,6 +171,32 @@ static void show_killed_files(struct dir_struct *dir)
 	}
 }
 
+/*
+ * Copy all of the parent arguments, but omit --submodule-prefix, as
+ * we will be adding our own. Other arguments are assumed to behave
+ * reasonably in the submodule, or to be blocked explicitly in
+ * cmd_ls_files().
+ *
+ * We have to make a full copy here, because parse_options will munge our
+ * original.
+ */
+static void store_recurse_argv(const char **argv)
+{
+	for (; *argv; argv++) {
+		const char *arg = *argv;
+
+		/* Yikes, we're reinventing option parsing here. */
+		if (starts_with(arg, "--submodule-prefix="))
+			continue;
+		if (!strcmp(arg, "--submodule-prefix")) {
+			if (argv[1])
+				argv++;
+			continue;
+		}
+		argv_array_push(&recurse_argv, arg);
+	}
+}
+
 /**
  * Recursively call ls-files on a submodule
  */
@@ -177,9 +204,10 @@ static void show_gitlink(const struct cache_entry *ce)
 {
 	struct child_process cp = CHILD_PROCESS_INIT;
 	int status;
+	int i;
 
-	argv_array_push(&cp.args, "ls-files");
-	argv_array_push(&cp.args, "--recurse-submodules");
+	for (i = 0; i < recurse_argv.argc; i++)
+		argv_array_push(&cp.args, recurse_argv.argv[i]);
 	argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
 			 submodule_prefix ? submodule_prefix : "",
 			 ce->name);
@@ -535,6 +563,8 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 	if (read_cache() < 0)
 		die("index file corrupt");
 
+	store_recurse_argv(argv);
+
 	argc = parse_options(argc, argv, prefix, builtin_ls_files_options,
 			ls_files_usage, 0);
 	el = add_exclude_list(&dir, EXC_CMDL, "--exclude option");
@@ -565,13 +595,6 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix)
 	if (require_work_tree && !is_inside_work_tree())
 		setup_work_tree();
 
-	if (recurse_submodules &&
-	    (show_stage || show_deleted || show_others || show_unmerged ||
-	     show_killed || show_modified || show_resolve_undo ||
-	     show_valid_bit || show_tag || show_eol))
-		die("ls-files --recurse-submodules can only be used in "
-		    "--cached mode");
-
 	if (recurse_submodules && error_unmatch)
 		die("ls-files --recurse-submodules does not support "
 		    "--error-unmatch");

which I admit is somewhat ugly, due to the "yikes" comment above. For
example, I think parse_options() will actually allow a unique prefix
like "--submodule-pre=foo/", which we would not catch. Or worse, we
would accidentally munge the pathspec in:

  git ls-files --recurse-submodules -- --submodule-prefix=foo

(or any case where a non-option looked like "--submodule-prefix=".
Obviously those are pathological, but it's still nasty.

I think we _could_ actually avoid picking out the --submodule-prefix
argument entirely, and instead just add our own at the end to override
it.

I don't think it actually matters that much here.  You _could_ get away
with even leaving the old --submodule-prefix in place, and just adding
our new one at the end to override it. That works, though it does mean
that your argv gets continually longer as you recurse modules. Or I
suppose another way would be to make the prefix option additive, and
then just literally add "--prefix-add=$name" at each level. The option
parser would then build up the real prefix from multiple prefix-add
options.

The final thing I could think of is that we could teach parse_options()
to record a canonicalized copy of all of the options in an argv_array.
So it would normalize "--submodule-pre foo" to "--submodule-prefix=foo",
and that makes our after-the-fact parsing much easier.

But like I said, I dunno. I'm on the fence on which approach is the
least ugly (including your original, to just forward specific options).
They all have their warts. :)

-Peff

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-22  4:18         ` Jeff King
@ 2016-09-22 16:04           ` Stefan Beller
  2016-09-22 18:13           ` Junio C Hamano
  1 sibling, 0 replies; 29+ messages in thread
From: Stefan Beller @ 2016-09-22 16:04 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Brandon Williams, git@vger.kernel.org

On Wed, Sep 21, 2016 at 9:18 PM, Jeff King <peff@peff.net> wrote:
> On Wed, Sep 21, 2016 at 04:13:22PM -0700, Junio C Hamano wrote:
>
>> Brandon Williams <bmwill@google.com> writes:
>>
>> > yes you mentioned this and I meant to change that before sending it out.
>> > Looks like it slipped through have slipped through.
>>
>> I already fixed it up locally when I sent the reply, but thanks for
>> resending (which assures me that your local copy is up-to-date and I
>> do not have to worry about having to repeat me in the future, if
>> this ever needs further rerolling ;-).
>
> While we are on the subject, the commit message also uses some past
> tense:
>
>   Allow ls-files to recognize submodules in order to retrieve a list of
>   files from a repository's submodules.  This is done by forking off a
>   process to recursively call ls-files on all submodules. Also added a
>   submodule-prefix command in order to prepend paths to child processes.
>
> The final sentence should be "Also add...".
>
> Since this final bit of logic was sufficiently non-obvious that it only
> came about in v2, maybe it is worth describing a little more fully:
>
>   Also add a submodule-prefix option, which instructs the child
>   processes to prepend the prefix to each path they output. This makes
>   the output paths match what is on the filesystem (i.e., as if the
>   submodule boundaries were not there at all).
>
> Should this option just be "--prefix", or maybe "--output-prefix"?

I think --prefix can easily be confused with the internal prefix that we hand
down to each command. --output-prefix works for the ls-files case, but as
soon as we do more than just printing out file names, we'd use that prefix for
more than just output, e.g. in grep it becomes part of the pathspec IIUC.
I agree however that we probably don't want to keep it submodule specific.

> Submodules are the obvious use case here, but I could see somebody
> adapting this for other uses (alternatively, if we _do_ want to keep it
> just as an implementation detail for submodules, we should probably
> discourage people in the documentation from using it themselves).
>
> -Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-22  4:18         ` Jeff King
  2016-09-22 16:04           ` Stefan Beller
@ 2016-09-22 18:13           ` Junio C Hamano
  2016-09-23  3:41             ` Jeff King
  2016-09-27 21:38             ` Junio C Hamano
  1 sibling, 2 replies; 29+ messages in thread
From: Junio C Hamano @ 2016-09-22 18:13 UTC (permalink / raw)
  To: Jeff King; +Cc: Brandon Williams, git

Jeff King <peff@peff.net> writes:

> Should this option just be "--prefix", or maybe "--output-prefix"?
> Submodules are the obvious use case here, but I could see somebody
> adapting this for other uses (alternatively, if we _do_ want to keep it
> just as an implementation detail for submodules, we should probably
> discourage people in the documentation from using it themselves).

I agree that this is not specific to submodules; this is closely
related to what we internally call "prefix", but is different.

In any case, I would strongly recommend against exposing this (or
anything for that matter) "--prefix" to the end-user, especially
because this feature is likely to be applicable to many subcommands,
and some subcommands would want different sort of prefixing made to
different things.  Think of "git diff" that has a way to customize
the "a/" and "b/" part in "git --diff a/$path b/$path", and has
learned another way to prepend an additional prefix to every line of
output via "--line-prefix".  We want to give reasonably specific
names to things so that readers can tell what it affects from the
name of an option.

What we internally call "prefix" and "--submodule-prefix" is closely
related in that they both interact with pathspecs.  "prefix" gets
prepended to elements of an end-user supplied pathspec before a
full-path-in-the-repository (i.e. a path in the index and a path
relative from the top of the working tree) is matched against them.
This new thing on the other hand allows the leading part of pathspec
elements to be above the full-path-in-the-repository.  For example,
my primary Git working area is at ~/w/git.git and I could say

    $ cd ~
    $ git -C w/git.git/ ls-files \
        --submodule-prefix=w/git.git -- 'w/git.git/D\*' |
      xargs ls -1 -l

The command starts (eh, at least "pretends to start") in the top of
my working tree, which means that the "prefix" is NULL.  But the
shell that spawns the "git" command is still sitting in my home
directory, and viewed from there, the Documentation subdirectory is
at w/git.git/Documentation/, which is what I gave the command as the
sole element of the pathspec.  From "ls-files"'s point of view, each
path it discovers in the index (and in the working tree) is first
prefixed with --submodule-prefix before it gets matched against this
pathspec and the result is shown with this prefix, so that the
output gets relative to the calling shell and xargs would find them
at expected places.  The --submodule-prefix acts to cancel out the
fact that the caller sits a few levels above the working tree and
gave a pathspec with elements relative to that higher level in the
directory hierarchy.

Obviously, the above example is *not* using submodules at all, and
demonstrates that the mechanism does not have to be used solely to
implement recurse-into-submodules behaviour, so in that sense, the
name of the option is not quite accurate.  It however is a good
demonstration why it is *not* "output prefix".  It makes me wonder
if pathspec-prefix is a better name, but that may give an incorrect
impression that this would be used only for matching and would not
affect the output.  Do we need separate options to specify what gets
prefixed to the in-repository path before paths are matched against
a pathspec (i.e. "--pathspec-prefix") and what gets prefixed to the
resulting paths in the output (i.e. "--output-path-prefix")?

A few further random thoughts.

 * As Stefan alluded to (much) earlier, it might be a better idea
   to have these 'prefix' as the global option to "git" potty, not
   to each subcommand that happens to support them;

 * It is unclear how this should interact with commands that are run
   in a subdirectory of the working tree.  E.g. what should the
   prefix and the pathspec look like if the command in the above
   example is started in w/git.git/Documentation subdirectory, i.e.

    $ cd ~
    $ git -C w/git.git/Documentation ls-files \
        --submodule-prefix=??????? -- '???????' |
      xargs ls -1 -l

   Should we error out if we are not at the top of the working tree
   when --submodule-prefix is given?

 * It is further unclear how this should interact with commands that
   can give "relative" paths output if we allowed them to start from
   a subdirectory of the working tree.  E.g. "git grep" gives its
   output relative to where it was started from:

    $ cd ~/w/git.git/Documentation
    $ git grep -c 'ERROR.*frotz' -- '*.txt'
    git-checkout.txt:1

   How should it interact with the --submodule-prefix option and
   what value should the caller give to the option and how should
   the caller adjust the pathspec if it wants to get paths relative
   to my home directory?  I do not think the following is correct,
   but I am not sure what the correct values of them should be:

    $ cd ~
    $ git -C w/git.git/Documentation \
      --submodule-prefix=w/git.git/ \
      grep -c 'ERROR.*frotz' -- 'w/git.git/Documentation/*.txt'

   This becomes a non-issue if we forbid use of this new prefix when
   "git" is not started at the top of the working tree.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-22 18:13           ` Junio C Hamano
@ 2016-09-23  3:41             ` Jeff King
  2016-09-23  5:47               ` Stefan Beller
  2016-09-27 21:38             ` Junio C Hamano
  1 sibling, 1 reply; 29+ messages in thread
From: Jeff King @ 2016-09-23  3:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Brandon Williams, git

On Thu, Sep 22, 2016 at 11:13:13AM -0700, Junio C Hamano wrote:

> In any case, I would strongly recommend against exposing this (or
> anything for that matter) "--prefix" to the end-user, especially
> because this feature is likely to be applicable to many subcommands,
> and some subcommands would want different sort of prefixing made to
> different things.

Fair enough. I was thinking that this was similar to other options like
"read-tree --prefix" or "archive --prefix". But if nobody really wants
it for anything non-internal, then certainly keeping it as an internal
feature is an easy way to avoid being stuck with a bad public interface
in the long term.

> What we internally call "prefix" and "--submodule-prefix" is closely
> related in that they both interact with pathspecs.

Yeah, I didn't think about pathspecs at all (since they are totally
disabled in patch 1, and I hadn't really read through patch 2 carefully
yet).

>  * As Stefan alluded to (much) earlier, it might be a better idea
>    to have these 'prefix' as the global option to "git" potty, not
>    to each subcommand that happens to support them;

That seems like it would be nice, but there's going to be an interim
period where some commands do not respect the global "--prefix" at all
(in the worst case, consider a third party command).

>  * It is unclear how this should interact with commands that are run
>    in a subdirectory of the working tree.  E.g. what should the
>    prefix and the pathspec look like if the command in the above
>    example is started in w/git.git/Documentation subdirectory, i.e.
> 
>     $ cd ~
>     $ git -C w/git.git/Documentation ls-files \
>         --submodule-prefix=??????? -- '???????' |
>       xargs ls -1 -l
> 
>    Should we error out if we are not at the top of the working tree
>    when --submodule-prefix is given?

Without thinking too hard on it, it seems like the submodule prefix
just needs to come after the normal "prefix" that we add when moving to
the top-level of a tree. So:

  cd foo
  git ls-files --submodule-prefix=bar

should show "foo/bar". Or another way of thinking about it is that the
submodule prefix is always relative to the current directory. Recursion
into submodule would always happen at their top-level, and so would do
the right thing.

But again, that's without thinking hard on it. There may be some corner
cases.

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-23  3:41             ` Jeff King
@ 2016-09-23  5:47               ` Stefan Beller
  2016-09-23  6:06                 ` Jeff King
  0 siblings, 1 reply; 29+ messages in thread
From: Stefan Beller @ 2016-09-23  5:47 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Brandon Williams, git@vger.kernel.org

On Thu, Sep 22, 2016 at 8:41 PM, Jeff King <peff@peff.net> wrote:

>>  * As Stefan alluded to (much) earlier, it might be a better idea
>>    to have these 'prefix' as the global option to "git" potty, not
>>    to each subcommand that happens to support them;
>
> That seems like it would be nice, but there's going to be an interim
> period where some commands do not respect the global "--prefix" at all
> (in the worst case, consider a third party command).

My current line of thinking is to have a new flag in command struct in
git.c to enable the global --prefix, (c.f. RUN_SETUP | NEED_WORK_TREE)
so we'd have a ALLOW_OUTSIDE_PREFIX flag which can be used to enable
this feature. In case that flag is not set, but a user tries a
--prefix=<somewhere>
we can still

    die("nope, we don't do that");

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-23  5:47               ` Stefan Beller
@ 2016-09-23  6:06                 ` Jeff King
  2016-09-23 16:16                   ` Brandon Williams
  0 siblings, 1 reply; 29+ messages in thread
From: Jeff King @ 2016-09-23  6:06 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, Brandon Williams, git@vger.kernel.org

On Thu, Sep 22, 2016 at 10:47:17PM -0700, Stefan Beller wrote:

> On Thu, Sep 22, 2016 at 8:41 PM, Jeff King <peff@peff.net> wrote:
> 
> >>  * As Stefan alluded to (much) earlier, it might be a better idea
> >>    to have these 'prefix' as the global option to "git" potty, not
> >>    to each subcommand that happens to support them;
> >
> > That seems like it would be nice, but there's going to be an interim
> > period where some commands do not respect the global "--prefix" at all
> > (in the worst case, consider a third party command).
> 
> My current line of thinking is to have a new flag in command struct in
> git.c to enable the global --prefix, (c.f. RUN_SETUP | NEED_WORK_TREE)
> so we'd have a ALLOW_OUTSIDE_PREFIX flag which can be used to enable
> this feature. In case that flag is not set, but a user tries a
> --prefix=<somewhere>
> we can still
> 
>     die("nope, we don't do that");

Yeah, a positive "I support this" flag would at least let us correctly
flag errors, which is the best we can do. That won't work for
non-builtins, but perhaps it is good enough in practice.

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-23  6:06                 ` Jeff King
@ 2016-09-23 16:16                   ` Brandon Williams
  2016-09-23 16:34                     ` Stefan Beller
  0 siblings, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-23 16:16 UTC (permalink / raw)
  To: Jeff King; +Cc: Stefan Beller, Junio C Hamano, git@vger.kernel.org

> Yeah, a positive "I support this" flag would at least let us correctly
> flag errors, which is the best we can do. That won't work for
> non-builtins, but perhaps it is good enough in practice.
>
> -Peff


So it sounds like we agree that this prefix option should be pushed to
the top level.
The question is have we come to a consensus on what we should be
calling the option?
Leave it as submodule-prefix or do we need to come up with a different name?

-Brandon

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-23 16:16                   ` Brandon Williams
@ 2016-09-23 16:34                     ` Stefan Beller
  2016-09-25 11:03                       ` Nazri Ramliy
  0 siblings, 1 reply; 29+ messages in thread
From: Stefan Beller @ 2016-09-23 16:34 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, Junio C Hamano, git@vger.kernel.org

On Fri, Sep 23, 2016 at 9:16 AM, Brandon Williams <bmwill@google.com> wrote:
>> Yeah, a positive "I support this" flag would at least let us correctly
>> flag errors, which is the best we can do. That won't work for
>> non-builtins, but perhaps it is good enough in practice.
>>
>> -Peff
>
>
> So it sounds like we agree that this prefix option should be pushed to
> the top level.
> The question is have we come to a consensus on what we should be
> calling the option?

The option itself is very similar to -C, which changes the directory to the
given argument before executing the git command.
e.g. in git:

    git -C builtin ls-files
    add.c
    ...

So for the submodule case we'd want that plus keeping around that prefix,
which makes me wonder if we could just store the argument of -C into a global
and use that when --keep-prefix is given, so you'd do a

    git -C path/to/sub --keep-prefix ls-files
    path/to/sub/file1
    ...

maybe --[keep|use]-[path|prefix] ?

You could of course go with a fully independent option, but how
would that work together with -C ?
(first change the dir and then change again while remembering the prefix?
or the other way round?)

> Leave it as submodule-prefix or do we need to come up with a different name?
>
> -Brandon

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2 v2] ls-files: add pathspec matching for submodules
  2016-09-21 23:28       ` [PATCH 2/2 v2] " Brandon Williams
@ 2016-09-23 18:48         ` Junio C Hamano
  2016-09-23 19:20         ` Junio C Hamano
  1 sibling, 0 replies; 29+ messages in thread
From: Junio C Hamano @ 2016-09-23 18:48 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

> -	/* Find common prefix for all pathspec's */
> -	max_prefix = common_prefix(&pathspec);
> +	/*
> +	 * Find common prefix for all pathspec's
> +	 * This is used as a performance optimization which unfortunately cannot
> +	 * be done when recursing into submodules
> +	 */
> +	if (recurse_submodules)
> +		max_prefix = NULL;
> +	else
> +		max_prefix = common_prefix(&pathspec);
>  	max_prefix_len = max_prefix ? strlen(max_prefix) : 0;

This is OK for now, but for a future enhancement, I think we could
do better than this.  In a superproject with a submodule at "sub/",
the current implementation of the common_prefix() helper would yield
"sub/a/" when given "sub/a/x" and "sub/a/y" (a pathspec with two
elements), which we want to avoid.

But somebody should be able to notice, before "sub/a/" is given to
max_prefix here, that "sub/" is the leaf level in our repository and
reduce the max_prefix to it.  dir.c::common_prefix_len() might be 
a place we could do so, but I didn't think about the ramifications
of doing so for other callers of common_prefix() or when we are not
recursing into submodules.  Doing it in the caller here, i.e.

	max_prefix = common_prefix(&pathspec);
        if (recurse_submodules)
        	max_prefix = chomp_at_submodule_boundary(max_prefix);

is certainly safer.

If the superproject has submodules at "a/b/{sub1,sub2,...}", this
matters more.  We do want to notice that we won't have to scan
outside "a/b/" of the index given "a/b/sub1" and "a/b/sub2" as a
pathspec.

The common_prefix_len() function also looks beyond symbolic links,
which is another thing that we may want to think about.  In a
repository with a symbolic link "link" pointing somewhere else, when
you give "link/a/x" and "link/a/y" (a pathspec with two elements),
we would get "link/a/" as a common prefix, but we won't find
anything underneath "link" in our index.  In such a case, leaving
the common prefix to "link/a/" _might_ allow us to notice that no
pathspec elements can ever match, so not noticing that the common
prefix points beyond a symbolic link might be a feature.  I dunno.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2 v2] ls-files: add pathspec matching for submodules
  2016-09-21 23:28       ` [PATCH 2/2 v2] " Brandon Williams
  2016-09-23 18:48         ` Junio C Hamano
@ 2016-09-23 19:20         ` Junio C Hamano
  2016-09-23 20:49           ` Brandon Williams
  1 sibling, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2016-09-23 19:20 UTC (permalink / raw)
  To: Brandon Williams; +Cc: git

Brandon Williams <bmwill@google.com> writes:

>  static void show_ce_entry(const char *tag, const struct cache_entry *ce)
>  {
> +	struct strbuf name = STRBUF_INIT;
>  	int len = max_prefix_len;
> +	if (submodule_prefix)
> +		strbuf_addstr(&name, submodule_prefix);
> +	strbuf_addstr(&name, ce->name);
> ...  
> +	} else if (match_pathspec(&pathspec, name.buf, name.len,
> +				  len, ps_matched,
> +				  S_ISDIR(ce->ce_mode) ||
> +				  S_ISGITLINK(ce->ce_mode))) {

There is an interesting observation around this code.  Note that it
is just something to keep in mind, even though I think we are in no
position to solve this within the scope of this series, or in fact I
am not sure if there is anything to "fix".

The expectation here is that the leading part of pathspec elements
contain path components above and outside the current working tree,
e.g. in a superproject with a submodule at "sub/", the end-user may
have said from the top of the superproject

    git ls-files --recurse-submodules -- sub/file

and the recursing "ls-files" is spawned as

    git -C sub ls-files -- sub/file

relaying the pathspec literally.

This does not correctly work if the path to the submodule has
wildcard in it.  Imagine that the submodule were at "s*b/".  The
recursing invocation would look like:

    git -C "s*b" ls-files -- "s*b/file"

Further imagine that the index in the submodule at "s*b" has two
paths in it, i.e.

	file
        oob/file

The prefix is prepended to them, to turn them into

	s*b/file
        s*b/oob/file

and I suspect that the pathspec element "s*b/file" would match both
of them.

The pathspec machinery has a provision to prevent a similar gotcha
happening for the "prefix" we internally use.  In a sample
repository created like so:

    $ git init
    $ mkdir -p 's*b/oob' sib
    $ >sib/file
    $ cd 's*b'
    $ >file
    $ >oob/file
    $ git add .
    $ git ls-files -- file

the "ls-files" in the last step gets 's*b/' as the "prefix", and the
pathspec is formed by concatenating "file" to it, but in a special
way.  The part that come from the "prefix" is marked not to honor
any wildcard in it, so 's*b/' even though it has an asterisk, it is
forced to match literally, giving only 's*b/file'.

A saving grace is that "s*b/file" in this case is what the end-user
is giving us, not something we internally generated.  So we can
simply blame the end user, saying "what --recurse-submodules does is
to (conceptually) flatten the indices of submodules into the index
of the superproject and show the entries that match your pathspec.
Because you gave us 's*b/file', which does match 's*b/oob/file',
that is what you get."

;-)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/2 v2] ls-files: add pathspec matching for submodules
  2016-09-23 19:20         ` Junio C Hamano
@ 2016-09-23 20:49           ` Brandon Williams
  0 siblings, 0 replies; 29+ messages in thread
From: Brandon Williams @ 2016-09-23 20:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Fri, Sep 23, 2016 at 12:20 PM, Junio C Hamano <gitster@pobox.com> wrote:
> There is an interesting observation around this code.  Note that it
> is just something to keep in mind, even though I think we are in no
> position to solve this within the scope of this series, or in fact I
> am not sure if there is anything to "fix".
>
> The expectation here is that the leading part of pathspec elements
> contain path components above and outside the current working tree,
> e.g. in a superproject with a submodule at "sub/", the end-user may
> have said from the top of the superproject
>
> A saving grace is that "s*b/file" in this case is what the end-user
> is giving us, not something we internally generated.  So we can
> simply blame the end user, saying "what --recurse-submodules does is
> to (conceptually) flatten the indices of submodules into the index
> of the superproject and show the entries that match your pathspec.
> Because you gave us 's*b/file', which does match 's*b/oob/file',
> that is what you get."
>
> ;-)

Yeah I've been thinking a bit about that as well.  To me, it is
incredibly silly to
have a wildcard character in a filename (its unfortunate that its
allowed).  We can
easily do as you suggest and simply blame the user and if they do have wildcard
characters in their filenames they would just need to force the
pathspec code to
do checks literally (using the appropriate pathspec magic).  This
would just limit their
ability to use actual wildcards in their pathspecs, ie they have to
pick wildcards in their
filenames or the ability to do wildmatching.

-Brandon

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: optionally recurse into submodules
  2016-09-22  6:20         ` Jeff King
@ 2016-09-23 23:31           ` Brandon Williams
  0 siblings, 0 replies; 29+ messages in thread
From: Brandon Williams @ 2016-09-23 23:31 UTC (permalink / raw)
  To: Jeff King; +Cc: git

On Wed, Sep 21, 2016 at 11:20 PM, Jeff King <peff@peff.net> wrote:
>> +/**
>> + * Recursively call ls-files on a submodule
>> + */
>> +static void show_gitlink(const struct cache_entry *ce)
>> +{
>> +     struct child_process cp = CHILD_PROCESS_INIT;
>> +     int status;
>> +
>> +     argv_array_push(&cp.args, "ls-files");
>> +     argv_array_push(&cp.args, "--recurse-submodules");
>> +     argv_array_pushf(&cp.args, "--submodule-prefix=%s%s/",
>> +                      submodule_prefix ? submodule_prefix : "",
>> +                      ce->name);
>> +     cp.git_cmd = 1;
>> +     cp.dir = ce->name;
>> +     status = run_command(&cp);
>> +     if (status)
>> +             exit(status);
>> +}
>
> This doesn't propagate the parent argv at all. So if I run:
>
>   git ls-files -z --recurse-submodules
>
> then the paths are all NUL-terminated in the parent, but
> newline-terminated in the submodules. Oops.

Yep definitely missed that.  I can fix that.  I think the main reason
for not blindly
copying the argv array is that there may be some things we don't want to pass
to the child. While not in the context of ls-files, I was working on
recursive grep
earlier and with that you can pass a rev to grep.  You can't blindly
copy that because
 the rev is meaningless to the the child and may produce broken
output.  Instead we
would need to pass the actual rev of what the parent has checked out
in that particular
rev.  I haven't thought it completely through yet but it did
discourage me from blindly
copying the args across.

-Brandon

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-23 16:34                     ` Stefan Beller
@ 2016-09-25 11:03                       ` Nazri Ramliy
  0 siblings, 0 replies; 29+ messages in thread
From: Nazri Ramliy @ 2016-09-25 11:03 UTC (permalink / raw)
  To: Stefan Beller
  Cc: Brandon Williams, Jeff King, Junio C Hamano, git@vger.kernel.org

On Sat, Sep 24, 2016 at 12:34 AM, Stefan Beller <sbeller@google.com> wrote:
>     git -C path/to/sub --keep-prefix ls-files

Note that the above can also be written like so (and works
identically), due to the fact that -C can be given multiple times:

    git -C path -C to -C sub --keep-prefix ls-files

nazri

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-22 18:13           ` Junio C Hamano
  2016-09-23  3:41             ` Jeff King
@ 2016-09-27 21:38             ` Junio C Hamano
  2016-09-27 21:48               ` Brandon Williams
  1 sibling, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2016-09-27 21:38 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, git

As this is relevant to what to call the prefix thing that is passed
down to an internal re-invocation of ls-files and how to explain it
to end-users...

Junio C Hamano <gitster@pobox.com> writes:

> I agree that this is not specific to submodules; this is closely
> related to what we internally call "prefix", but is different.
>
> In any case, I would strongly recommend against exposing this (or
> anything for that matter) "--prefix" to the end-user,...

I have a slight suspicion that this was what made you say earlier
that I was against exposing this at all to end users, but what I
meant was a plain boring "prefix" is a bad name and nothing more
than that.

> What we internally call "prefix" and "--submodule-prefix" is closely
> related in that they both interact with pathspecs.  "prefix" gets
> prepended to elements of an end-user supplied pathspec before a
> full-path-in-the-repository (i.e. a path in the index and a path
> relative from the top of the working tree) is matched against them.

In a sense, this new thing is a superset of the existing GIT_PREFIX,
which gives the current working directory from the end user's point
of view relative to the actual current working directory.  The "git"
wrapper, when running a "git thing" from a subdirectory of a working
tree, chdir(2)s up to the top-level before spawning the "thing"
subcommand that is not built-in or a third-party "git-thing" binary
on user's $PATH; so "git-thing" binary will be told via $GIT_PREFIX
relative to what directory path-like things the end user supplied to
the command (e.g. "-o outfile" argument and pathspecs) need to be
interpreted.  The new thing can override the $GIT_PREFIX to allow
path-like things to be interpreted relative to somewhere _above_ the
top-level of the working tree.

I am tempted to suggest GIT_SUPER_PREFIX, 50% because I cannot think
of any better word, 40% because I think it actually makes sense (in
the sense that this comes _above_ it, hence "super", and also in the
sense that this is something your "super" project would give you),
and 10% because I hope that being ridiculous would nudge people to
come up with a better alternative ;-)

And from that point of view ...

>  * It is unclear how this should interact with commands that are run
>    in a subdirectory of the working tree.  E.g. what should the
>    prefix and the pathspec look like if the command in the above
>    example is started in w/git.git/Documentation subdirectory, i.e.
>
>     $ cd ~
>     $ git -C w/git.git/Documentation ls-files \
>         --submodule-prefix=??????? -- '???????' |
>       xargs ls -1 -l
>
>    Should we error out if we are not at the top of the working tree
>    when --submodule-prefix is given?

... the answer to this question becomes clear.  It is not possible
to _be_ in a subdirectory "Documentation" of this working tree and
in a directory "~" above this working tree at the same time, so we
simply should forbid running the command from anywhere other than
the top of the working tree (i.e. the internal "prefix" and
GIT_PREFIX must be empty) when the super-prefix is set by erroring
it out.  When we realize that "prefix" adds to the paths that are
supplied by the user (e.g. when the user says Makefile while in t/
subdirectory, i.e. GIT_PREFIX=t/, s/he means t/Makefile), but this
new thing subtracts from the paths given by the user (e.g. when the
user gives a pathspec 'w/git.git/D*' while setting the super thing
as w/git.git, because s/he is at ~/, the pathspec matcher
conceptually subtracts w/git.git/ from the pathspec before matching
them against the paths it finds in the index), it becomes clear that
giving both at the same time is awkward and not very useful.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-27 21:38             ` Junio C Hamano
@ 2016-09-27 21:48               ` Brandon Williams
  2016-09-27 22:01                 ` Junio C Hamano
  0 siblings, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-27 21:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, git

On 09/27, Junio C Hamano wrote:
> > What we internally call "prefix" and "--submodule-prefix" is closely
> > related in that they both interact with pathspecs.  "prefix" gets
> > prepended to elements of an end-user supplied pathspec before a
> > full-path-in-the-repository (i.e. a path in the index and a path
> > relative from the top of the working tree) is matched against them.
> 
> In a sense, this new thing is a superset of the existing GIT_PREFIX,
> which gives the current working directory from the end user's point
> of view relative to the actual current working directory.  The "git"
> wrapper, when running a "git thing" from a subdirectory of a working
> tree, chdir(2)s up to the top-level before spawning the "thing"
> subcommand that is not built-in or a third-party "git-thing" binary
> on user's $PATH; so "git-thing" binary will be told via $GIT_PREFIX
> relative to what directory path-like things the end user supplied to
> the command (e.g. "-o outfile" argument and pathspecs) need to be
> interpreted.  The new thing can override the $GIT_PREFIX to allow
> path-like things to be interpreted relative to somewhere _above_ the
> top-level of the working tree.
> 
> I am tempted to suggest GIT_SUPER_PREFIX, 50% because I cannot think
> of any better word, 40% because I think it actually makes sense (in
> the sense that this comes _above_ it, hence "super", and also in the
> sense that this is something your "super" project would give you),
> and 10% because I hope that being ridiculous would nudge people to
> come up with a better alternative ;-)

haha, yeah it seems difficult to come up with a name that is going to
mean the same thing in 5 years from now.  At least GIT_SUPER_PREFIX
makes senese in that it is a path from the superproject down to the
submodule.

> >  * It is unclear how this should interact with commands that are run
> >    in a subdirectory of the working tree.  E.g. what should the
> >    prefix and the pathspec look like if the command in the above
> >    example is started in w/git.git/Documentation subdirectory, i.e.
> >
> >     $ cd ~
> >     $ git -C w/git.git/Documentation ls-files \
> >         --submodule-prefix=??????? -- '???????' |
> >       xargs ls -1 -l
> >
> >    Should we error out if we are not at the top of the working tree
> >    when --submodule-prefix is given?
> 
> ... the answer to this question becomes clear.  It is not possible
> to _be_ in a subdirectory "Documentation" of this working tree and
> in a directory "~" above this working tree at the same time, so we
> simply should forbid running the command from anywhere other than
> the top of the working tree (i.e. the internal "prefix" and
> GIT_PREFIX must be empty) when the super-prefix is set by erroring
> it out.  When we realize that "prefix" adds to the paths that are
> supplied by the user (e.g. when the user says Makefile while in t/
> subdirectory, i.e. GIT_PREFIX=t/, s/he means t/Makefile), but this
> new thing subtracts from the paths given by the user (e.g. when the
> user gives a pathspec 'w/git.git/D*' while setting the super thing
> as w/git.git, because s/he is at ~/, the pathspec matcher
> conceptually subtracts w/git.git/ from the pathspec before matching
> them against the paths it finds in the index), it becomes clear that
> giving both at the same time is awkward and not very useful.

Well maybe...I don't really know much about how the prefix interacts in
every scenario but would what you describe still work if we are in a sub
dir of the superproject (which contains other directorys and perhaps a
submodule) and execute a --recurse-submodules command in the
subdirectory?  I suspect we don't want to force users to be in the root
directory of the project in order to use --recurse-submodules.

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-27 21:48               ` Brandon Williams
@ 2016-09-27 22:01                 ` Junio C Hamano
  2016-09-27 22:09                   ` Brandon Williams
  0 siblings, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2016-09-27 22:01 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, git

Brandon Williams <bmwill@google.com> writes:

> Well maybe...I don't really know much about how the prefix interacts in
> every scenario but would what you describe still work if we are in a sub
> dir of the superproject (which contains other directorys and perhaps a
> submodule) and execute a --recurse-submodules command in the
> subdirectory?  I suspect we don't want to force users to be in the root
> directory of the project in order to use --recurse-submodules.

You need to remember "must be at the top" is relevant only to the
command that is invoked with --super-prefix, not the recursive one
that drives such a process.

Suppose your superproject is organized like so:

    - file-at-top
      a/file-in-A
      a/b (submodule)
      a/b/file-at-top-of-B
      c/  (submodule)
      c/file-at-top-of-C

If you are in a subdirectory of your superproject, say, a/,

    cd a && git ls-files --recurse-submodules -- "b*"

I would expect we would recurse into the submodule at "a/b" and find
"b/file-at-top-of-B".  What does the internal invocation to do so
would look like?  I would think "git -C b --super=b ls-files" that
is run from "a".

Your code would is already prepared to find "file-at-top-of-B" in
the index of the submodule, prepend "b/" to it and report the result
as "b/file-at-top-of-B" when such a call is made, I think.

Now, can you refer to c/ and c/file-at-top-of-C while sitting at a/?

    cd a && git ls-files --recurse-submodules -- "../c*"

would be the top-level invocation.  This would iterate over the
index of the superproject, trying to find what matches "c*" (or,
"../c*" relative to "a" i.e. where you are), find that 'c' that is a
submodule, and invoke "git -C ../c --super=../c ls-files"
internally, I would imagine.  I think your code is prepared to
accept this case as well.

In any case, the "must be at the top" does not come into the picture
at all for the end-user interaction, i.e. invocation of the command
that is told to recurse into submodules, so we'd be OK.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-27 22:01                 ` Junio C Hamano
@ 2016-09-27 22:09                   ` Brandon Williams
  2016-09-27 22:23                     ` Junio C Hamano
  0 siblings, 1 reply; 29+ messages in thread
From: Brandon Williams @ 2016-09-27 22:09 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, git

On 09/27, Junio C Hamano wrote:
> Brandon Williams <bmwill@google.com> writes:
> 
> > Well maybe...I don't really know much about how the prefix interacts in
> > every scenario but would what you describe still work if we are in a sub
> > dir of the superproject (which contains other directorys and perhaps a
> > submodule) and execute a --recurse-submodules command in the
> > subdirectory?  I suspect we don't want to force users to be in the root
> > directory of the project in order to use --recurse-submodules.
> 
> You need to remember "must be at the top" is relevant only to the
> command that is invoked with --super-prefix, not the recursive one
> that drives such a process.
> 
> Suppose your superproject is organized like so:
> 
>     - file-at-top
>       a/file-in-A
>       a/b (submodule)
>       a/b/file-at-top-of-B
>       c/  (submodule)
>       c/file-at-top-of-C
> 
> If you are in a subdirectory of your superproject, say, a/,
> 
>     cd a && git ls-files --recurse-submodules -- "b*"
> 
> I would expect we would recurse into the submodule at "a/b" and find
> "b/file-at-top-of-B".  What does the internal invocation to do so
> would look like?  I would think "git -C b --super=b ls-files" that
> is run from "a".
> 
> Your code would is already prepared to find "file-at-top-of-B" in
> the index of the submodule, prepend "b/" to it and report the result
> as "b/file-at-top-of-B" when such a call is made, I think.
> 
> Now, can you refer to c/ and c/file-at-top-of-C while sitting at a/?
> 
>     cd a && git ls-files --recurse-submodules -- "../c*"
> 
> would be the top-level invocation.  This would iterate over the
> index of the superproject, trying to find what matches "c*" (or,
> "../c*" relative to "a" i.e. where you are), find that 'c' that is a
> submodule, and invoke "git -C ../c --super=../c ls-files"
> internally, I would imagine.  I think your code is prepared to
> accept this case as well.
> 
> In any case, the "must be at the top" does not come into the picture
> at all for the end-user interaction, i.e. invocation of the command
> that is told to recurse into submodules, so we'd be OK.

Thanks for the clear explination that makes sense.

Also, --super-prefix as a name is growing on me :)

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 1/2] ls-files: adding support for submodules
  2016-09-27 22:09                   ` Brandon Williams
@ 2016-09-27 22:23                     ` Junio C Hamano
  0 siblings, 0 replies; 29+ messages in thread
From: Junio C Hamano @ 2016-09-27 22:23 UTC (permalink / raw)
  To: Brandon Williams; +Cc: Jeff King, git

Brandon Williams <bmwill@google.com> writes:

>> If you are in a subdirectory of your superproject, say, a/,
>> 
>>     cd a && git ls-files --recurse-submodules -- "b*"
>> 
>> I would expect we would recurse into the submodule at "a/b" and find
>> "b/file-at-top-of-B".  What does the internal invocation to do so
>> would look like?  I would think "git -C b --super=b ls-files" that
>> is run from "a".

Actually, the internal invocation may have to be

	$ git --super=a/b ls-files -- "a/b*"

if the desired overall output needs to be in the "--full-name" mode.
That is, the top-level recursive one may be

    cd a && git ls-files --recurse-submodules --full-name -- "b*"

This top-level "ls-files" will have "prefix" set to "a/".  Because
it is run in the "--full-name" mode, after finding that the
submodule at "a/b" matches the given pathspec and deciding to
recurse into it, it needs to arrange that paths stored in the index
of the submodule are prefixed with "a/b/", not with "b/", when
shown.


^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2016-09-27 22:23 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-21 22:04 [PATCH 1/2] ls-files: adding support for submodules Brandon Williams
2016-09-21 22:04 ` [PATCH 2/2] ls-files: add pathspec matching " Brandon Williams
2016-09-21 22:53   ` Junio C Hamano
2016-09-21 23:23     ` Brandon Williams
2016-09-21 23:28       ` [PATCH 2/2 v2] " Brandon Williams
2016-09-23 18:48         ` Junio C Hamano
2016-09-23 19:20         ` Junio C Hamano
2016-09-23 20:49           ` Brandon Williams
2016-09-21 22:08 ` [PATCH 1/2] ls-files: adding support " Brandon Williams
2016-09-21 22:28   ` Junio C Hamano
2016-09-21 22:38     ` Brandon Williams
2016-09-21 22:42       ` [PATCH 1/2] ls-files: optionally recurse into submodules Brandon Williams
2016-09-22  6:20         ` Jeff King
2016-09-23 23:31           ` Brandon Williams
2016-09-21 23:13       ` [PATCH 1/2] ls-files: adding support for submodules Junio C Hamano
2016-09-22  4:18         ` Jeff King
2016-09-22 16:04           ` Stefan Beller
2016-09-22 18:13           ` Junio C Hamano
2016-09-23  3:41             ` Jeff King
2016-09-23  5:47               ` Stefan Beller
2016-09-23  6:06                 ` Jeff King
2016-09-23 16:16                   ` Brandon Williams
2016-09-23 16:34                     ` Stefan Beller
2016-09-25 11:03                       ` Nazri Ramliy
2016-09-27 21:38             ` Junio C Hamano
2016-09-27 21:48               ` Brandon Williams
2016-09-27 22:01                 ` Junio C Hamano
2016-09-27 22:09                   ` Brandon Williams
2016-09-27 22:23                     ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).