git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCHv8 0/5] pathspec magic extension to search for attributes
@ 2016-05-19  1:09 Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 1/5] string list: improve comment Stefan Beller
                   ` (5 more replies)
  0 siblings, 6 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19  1:09 UTC (permalink / raw)
  To: gitster, pclouds; +Cc: git, Stefan Beller

This goes on top of origin/jc/attr, (396bf756f95, attr: expose validity check
for attribute names)

Patches 1 and 2 are small fixes, which could go independently as well.
Patches 3 and 4 are refactoring pathspec.c a little.
These did not change since v7


diff to v7:
diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
index 866e8d8..aa9f220 100644
--- a/Documentation/glossary-content.txt
+++ b/Documentation/glossary-content.txt
@@ -394,11 +394,9 @@ you can query each attribute for certain states. The "`[mode]`" is a special
 character to indicate which attribute states are looked for. The following
 modes are available:

- - "`+`" the attribute must be set
+ - an empty "`[mode]`" matches if the attribute is set
  - "`-`" the attribute must be unset
- - "`~`" the attribute must be unspecified
- - "`?`" the attribute must not be unspecified, i.e. set, unset or value matches
- - an empty "`[mode]`" matches if the attribute is set or has a value
+ - "`!`" the attribute must be unspecified
  - an empty "`[mode]`" combined with "`[=value]`" matches if the attribute has
    the given value.
 +
diff --git a/dir.c b/dir.c
index 3141a5a..f60a503 100644
--- a/dir.c
+++ b/dir.c
@@ -219,42 +219,31 @@ int within_depth(const char *name, int namelen,
 static int match_attrs(const char *name, int namelen,
 		       const struct pathspec_item *item)
 {
-	char *path;
 	int i;

-	path = xmemdupz(name, namelen);
-	git_check_attr(path, item->attr_check);
-
+	git_check_attr_counted(name, namelen, item->attr_check);
 	for (i = 0; i < item->attr_match_nr; i++) {
 		const char *value;
 		int matched;
 		enum attr_match_mode match_mode;

 		value = item->attr_check->check[i].value;
-
 		match_mode = item->attr_match[i].match_mode;

-		if (ATTR_TRUE(value)) {
-			matched = match_mode == MATCH_SET ||
-				  match_mode == MATCH_SET_OR_VALUE ||
-				  match_mode == MATCH_NOT_UNSPECIFIED;
-		} else if (ATTR_FALSE(value)) {
-			matched = match_mode == MATCH_UNSET ||
-				  match_mode == MATCH_NOT_UNSPECIFIED;
-		} else if (ATTR_UNSET(value)) {
+		if (ATTR_TRUE(value))
+			matched = match_mode == MATCH_SET;
+		else if (ATTR_FALSE(value))
+			matched = match_mode == MATCH_UNSET;
+		else if (ATTR_UNSET(value))
 			matched = match_mode == MATCH_UNSPECIFIED;
-		} else {
-			matched = match_mode == MATCH_NOT_UNSPECIFIED ||
-				  match_mode == MATCH_SET_OR_VALUE ||
-				  (match_mode == MATCH_VALUE &&
+		else
+			matched = (match_mode == MATCH_VALUE &&
 				   !strcmp(item->attr_match[i].value, value));
-		}
+
 		if (!matched)
 			return 0;
 	}

-	free(path);
-
 	return 1;
 }

diff --git a/pathspec.c b/pathspec.c
index 32fb6a8..b795a9c 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -96,66 +96,58 @@ static void parse_pathspec_attr_match(struct pathspec_item *item, const char *va


 	if (!value || !strlen(value))
-		goto err;
+		die(_("attr spec must not be empty"));

 	string_list_split(&list, value, ' ', -1);
 	string_list_remove_empty_items(&list, 0);

 	if (!item->attr_check)
 		item->attr_check = git_attr_check_alloc();
+	else
+		die(_("Only one 'attr:' specification is allowed."));

 	ALLOC_GROW(item->attr_match, item->attr_match_nr + list.nr, item->attr_match_alloc);

 	for_each_string_list_item(si, &list) {
-		size_t val_len;
+		size_t attr_len;

 		int j = item->attr_match_nr++;
-		const char *val = si->string;
+		const char *attr = si->string;
 		struct attr_match *am = &item->attr_match[j];

-		if (val[0] == '?')
-			am->match_mode = MATCH_NOT_UNSPECIFIED;
-		else if (val[0] == '~')
+		if (attr[0] == '!')
 			am->match_mode = MATCH_UNSPECIFIED;
-		else if (val[0] == '+')
-			am->match_mode = MATCH_SET;
-		else if (val[0] == '-')
+		else if (attr[0] == '-')
 			am->match_mode = MATCH_UNSET;
 		else
-			am->match_mode = MATCH_SET_OR_VALUE;
+			am->match_mode = MATCH_SET;

-		if (am->match_mode != MATCH_SET_OR_VALUE)
+		if (am->match_mode != MATCH_SET)
 			/* skip first character */
-			val++;
+			attr++;

-		val_len = strcspn(val, "=,)");
-		if (val[val_len] == '=') {
+		attr_len = strcspn(attr, "=");
+		if (attr[attr_len] == '=') {
 			am->match_mode = MATCH_VALUE;
-			am->value = xstrdup(&val[val_len + 1]);
-			/*
-			 * NEEDSWORK:
-			 * Do we want to allow escaped commas to search
-			 * for comma separated values?
-			 */
+			am->value = xstrdup(&attr[attr_len + 1]);
 			if (strchr(am->value, '\\'))
 				die(_("attr spec values must not contain backslashes"));
 		} else
 			am->value = NULL;

-		if (invalid_attr_name(val, val_len)) {
+		if (!attr_name_valid(attr, attr_len)) {
+			struct strbuf sb = STRBUF_INIT;
 			am->match_mode = INVALID_ATTR;
-			goto err;
+			invalid_attr_name_message(&sb, attr, attr_len);
+			die(_("invalid attribute in '%s': '%s'"), value, sb.buf);
 		}

-		am->attr = git_attr(xmemdupz(val, val_len));
+		am->attr = git_attr_counted(attr, attr_len);
 		git_attr_check_append(item->attr_check, am->attr);
 	}

 	string_list_clear(&list, 0);
 	return;
-err:
-	die(_("attr spec '%s': attrs must not start with '-' and "
-	      "be composed of [-A-Za-z0-9_.]."), value);
 }

 static void eat_long_magic(struct pathspec_item *item, const char *elt,
@@ -188,9 +180,9 @@ static void eat_long_magic(struct pathspec_item *item, const char *elt,
 		}

 		if (skip_prefix(copyfrom, "attr:", &body)) {
-			char *pass = xmemdupz(body, len - strlen("attr:"));
-			parse_pathspec_attr_match(item, pass);
-			free(pass);
+			char *attr_body = xmemdupz(body, len - strlen("attr:"));
+			parse_pathspec_attr_match(item, attr_body);
+			free(attr_body);
 			continue;
 		}

@@ -591,6 +583,8 @@ void free_pathspec(struct pathspec *pathspec)
 {
 	int i, j;
 	for (i = 0; i < pathspec->nr; i++) {
+		if (!pathspec->items[i].attr_match_nr)
+			continue;
 		for (j = 0; j < pathspec->items[j].attr_match_nr; j++)
 			free(pathspec->items[i].attr_match[j].value);
 		free(pathspec->items[i].attr_match);
diff --git a/t/t6134-pathspec-with-labels.sh b/t/t6134-pathspec-with-labels.sh
index 35b3ab2..c0d8cda 100755
--- a/t/t6134-pathspec-with-labels.sh
+++ b/t/t6134-pathspec-with-labels.sh
@@ -74,7 +74,7 @@ test_expect_success 'check specific set attr' '
 fileSetLabel
 sub/fileSetLabel
 EOF
-	git ls-files ":(attr:+label)" >actual &&
+	git ls-files ":(attr:label)" >actual &&
 	test_cmp expect actual
 '

@@ -98,52 +98,41 @@ EOF
 	test_must_be_empty actual
 '

-test_expect_success 'check set or value attr' '
-	cat <<EOF >expect &&
-fileSetLabel
-fileValue
-sub/fileSetLabel
-sub/fileValue
-EOF
-	git ls-files ":(attr:label)" >actual &&
-	test_cmp expect actual
-'
-
 test_expect_success 'check unspecified attr' '
 	cat <<EOF >expect &&
 .gitattributes
+fileA
+fileAB
+fileAC
+fileB
+fileBC
 fileC
 fileNoLabel
 fileWrongLabel
+sub/fileA
+sub/fileAB
+sub/fileAC
+sub/fileB
+sub/fileBC
 sub/fileC
 sub/fileNoLabel
 sub/fileWrongLabel
 EOF
-	git ls-files ":(attr:~label,attr:~labelA,attr:~labelB)" >actual &&
+	git ls-files :\(attr:\!label\) >actual &&
 	test_cmp expect actual
 '

-test_expect_success 'check not unspecified attr' '
+test_expect_success 'check multiple unspecified attr' '
 	cat <<EOF >expect &&
-fileSetLabel
-fileUnsetLabel
-fileValue
-sub/fileSetLabel
-sub/fileUnsetLabel
-sub/fileValue
-EOF
-	git ls-files ":(attr:?label)" >actual &&
-	test_cmp expect actual
-'
-
-test_expect_success 'check label with 2 labels' '
-	cat <<EOF >expect &&
-fileAB
-sub/fileAB
+.gitattributes
+fileC
+fileNoLabel
+fileWrongLabel
+sub/fileC
+sub/fileNoLabel
+sub/fileWrongLabel
 EOF
-	git ls-files ":(attr:labelA labelB)" >actual &&
-	test_cmp expect actual &&
-	git ls-files ":(attr:labelA,attr:labelB)" >actual &&
+	git ls-files :\(attr:\!labelB\ \!labelA\ \!label\) >actual &&
 	test_cmp expect actual
 '

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCHv8 1/5] string list: improve comment
  2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
@ 2016-05-19  1:09 ` Stefan Beller
  2016-05-19 18:08   ` Junio C Hamano
  2016-05-19  1:09 ` [PATCHv8 2/5] Documentation: fix a typo Stefan Beller
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 22+ messages in thread
From: Stefan Beller @ 2016-05-19  1:09 UTC (permalink / raw)
  To: gitster, pclouds; +Cc: git, Stefan Beller

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 string-list.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/string-list.h b/string-list.h
index d3809a1..465a1f0 100644
--- a/string-list.h
+++ b/string-list.h
@@ -106,7 +106,7 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
  * list->strdup_strings must be set, as new memory needs to be
  * allocated to hold the substrings.  If maxsplit is non-negative,
  * then split at most maxsplit times.  Return the number of substrings
- * appended to list.
+ * appended to list. The list may be non-empty already.
  *
  * Examples:
  *   string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"]
-- 
2.8.2.123.g3bde101

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCHv8 2/5] Documentation: fix a typo
  2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 1/5] string list: improve comment Stefan Beller
@ 2016-05-19  1:09 ` Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 3/5] pathspec: move long magic parsing out of prefix_pathspec Stefan Beller
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19  1:09 UTC (permalink / raw)
  To: gitster, pclouds; +Cc: git, Stefan Beller

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/gitattributes.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index e3b1de8..af2c682 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -86,7 +86,7 @@ is either not set or empty, $HOME/.config/git/attributes is used instead.
 Attributes for all users on a system should be placed in the
 `$(prefix)/etc/gitattributes` file.
 
-Sometimes you would need to override an setting of an attribute
+Sometimes you would need to override a setting of an attribute
 for a path to `Unspecified` state.  This can be done by listing
 the name of the attribute prefixed with an exclamation point `!`.
 
-- 
2.8.2.123.g3bde101

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCHv8 3/5] pathspec: move long magic parsing out of prefix_pathspec
  2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 1/5] string list: improve comment Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 2/5] Documentation: fix a typo Stefan Beller
@ 2016-05-19  1:09 ` Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 4/5] pathspec: move prefix check out of the inner loop Stefan Beller
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19  1:09 UTC (permalink / raw)
  To: gitster, pclouds; +Cc: git, Stefan Beller

`prefix_pathspec` is quite a lengthy function and we plan on adding more.
Split it up for better readability. As we want to add code into the
inner loop of the long magic parsing, we also benefit from lower
indentation.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 pathspec.c | 84 +++++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 47 insertions(+), 37 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index c9e9b6c..eba37c2 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -88,6 +88,52 @@ static void prefix_short_magic(struct strbuf *sb, int prefixlen,
 	strbuf_addf(sb, ",prefix:%d)", prefixlen);
 }
 
+static void eat_long_magic(struct pathspec_item *item, const char *elt,
+		unsigned *magic, int *pathspec_prefix,
+		const char **copyfrom_, const char **long_magic_end)
+{
+	int i;
+	const char *copyfrom = *copyfrom_;
+	/* longhand */
+	const char *nextat;
+	for (copyfrom = elt + 2;
+	     *copyfrom && *copyfrom != ')';
+	     copyfrom = nextat) {
+		size_t len = strcspn(copyfrom, ",)");
+		if (copyfrom[len] == ',')
+			nextat = copyfrom + len + 1;
+		else
+			/* handle ')' and '\0' */
+			nextat = copyfrom + len;
+		if (!len)
+			continue;
+		for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
+			if (strlen(pathspec_magic[i].name) == len &&
+			    !strncmp(pathspec_magic[i].name, copyfrom, len)) {
+				*magic |= pathspec_magic[i].bit;
+				break;
+			}
+			if (starts_with(copyfrom, "prefix:")) {
+				char *endptr;
+				*pathspec_prefix = strtol(copyfrom + 7,
+							  &endptr, 10);
+				if (endptr - copyfrom != len)
+					die(_("invalid parameter for pathspec magic 'prefix'"));
+				/* "i" would be wrong, but it does not matter */
+				break;
+			}
+		}
+		if (ARRAY_SIZE(pathspec_magic) <= i)
+			die(_("Invalid pathspec magic '%.*s' in '%s'"),
+			    (int) len, copyfrom, elt);
+	}
+	if (*copyfrom != ')')
+		die(_("Missing ')' at the end of pathspec magic in '%s'"), elt);
+	*long_magic_end = copyfrom;
+	copyfrom++;
+	*copyfrom_ = copyfrom;
+}
+
 /*
  * Take an element of a pathspec and check for magic signatures.
  * Append the result to the prefix. Return the magic bitmap.
@@ -150,43 +196,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item,
 	    (flags & PATHSPEC_LITERAL_PATH)) {
 		; /* nothing to do */
 	} else if (elt[1] == '(') {
-		/* longhand */
-		const char *nextat;
-		for (copyfrom = elt + 2;
-		     *copyfrom && *copyfrom != ')';
-		     copyfrom = nextat) {
-			size_t len = strcspn(copyfrom, ",)");
-			if (copyfrom[len] == ',')
-				nextat = copyfrom + len + 1;
-			else
-				/* handle ')' and '\0' */
-				nextat = copyfrom + len;
-			if (!len)
-				continue;
-			for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
-				if (strlen(pathspec_magic[i].name) == len &&
-				    !strncmp(pathspec_magic[i].name, copyfrom, len)) {
-					magic |= pathspec_magic[i].bit;
-					break;
-				}
-				if (starts_with(copyfrom, "prefix:")) {
-					char *endptr;
-					pathspec_prefix = strtol(copyfrom + 7,
-								 &endptr, 10);
-					if (endptr - copyfrom != len)
-						die(_("invalid parameter for pathspec magic 'prefix'"));
-					/* "i" would be wrong, but it does not matter */
-					break;
-				}
-			}
-			if (ARRAY_SIZE(pathspec_magic) <= i)
-				die(_("Invalid pathspec magic '%.*s' in '%s'"),
-				    (int) len, copyfrom, elt);
-		}
-		if (*copyfrom != ')')
-			die(_("Missing ')' at the end of pathspec magic in '%s'"), elt);
-		long_magic_end = copyfrom;
-		copyfrom++;
+		eat_long_magic(item, elt, &magic, &pathspec_prefix, &copyfrom, &long_magic_end);
 	} else {
 		/* shorthand */
 		for (copyfrom = elt + 1;
-- 
2.8.2.123.g3bde101

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCHv8 4/5] pathspec: move prefix check out of the inner loop
  2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
                   ` (2 preceding siblings ...)
  2016-05-19  1:09 ` [PATCHv8 3/5] pathspec: move long magic parsing out of prefix_pathspec Stefan Beller
@ 2016-05-19  1:09 ` Stefan Beller
  2016-05-19  1:09 ` [PATCHv8 5/5] pathspec: allow querying for attributes Stefan Beller
  2016-05-19 18:55 ` [PATCHv8 0/5] pathspec magic extension to search " Junio C Hamano
  5 siblings, 0 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19  1:09 UTC (permalink / raw)
  To: gitster, pclouds; +Cc: git, Stefan Beller

The prefix check is not related the check of pathspec magic; also there
is no code that is relevant after we'd break the loop on a match for
"prefix:". So move the check before the loop and shortcircuit the outer
loop.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 pathspec.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pathspec.c b/pathspec.c
index eba37c2..4dff252 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -107,21 +107,22 @@ static void eat_long_magic(struct pathspec_item *item, const char *elt,
 			nextat = copyfrom + len;
 		if (!len)
 			continue;
+
+		if (starts_with(copyfrom, "prefix:")) {
+			char *endptr;
+			*pathspec_prefix = strtol(copyfrom + 7,
+						  &endptr, 10);
+			if (endptr - copyfrom != len)
+				die(_("invalid parameter for pathspec magic 'prefix'"));
+			continue;
+		}
+
 		for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 			if (strlen(pathspec_magic[i].name) == len &&
 			    !strncmp(pathspec_magic[i].name, copyfrom, len)) {
 				*magic |= pathspec_magic[i].bit;
 				break;
 			}
-			if (starts_with(copyfrom, "prefix:")) {
-				char *endptr;
-				*pathspec_prefix = strtol(copyfrom + 7,
-							  &endptr, 10);
-				if (endptr - copyfrom != len)
-					die(_("invalid parameter for pathspec magic 'prefix'"));
-				/* "i" would be wrong, but it does not matter */
-				break;
-			}
 		}
 		if (ARRAY_SIZE(pathspec_magic) <= i)
 			die(_("Invalid pathspec magic '%.*s' in '%s'"),
-- 
2.8.2.123.g3bde101

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCHv8 5/5] pathspec: allow querying for attributes
  2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
                   ` (3 preceding siblings ...)
  2016-05-19  1:09 ` [PATCHv8 4/5] pathspec: move prefix check out of the inner loop Stefan Beller
@ 2016-05-19  1:09 ` Stefan Beller
  2016-05-19 18:53   ` Junio C Hamano
  2016-05-19 19:37   ` Junio C Hamano
  2016-05-19 18:55 ` [PATCHv8 0/5] pathspec magic extension to search " Junio C Hamano
  5 siblings, 2 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19  1:09 UTC (permalink / raw)
  To: gitster, pclouds; +Cc: git, Stefan Beller

The pathspec mechanism is extended via the new
":(attr:eol=input)pattern/to/match" syntax to filter paths so that it
requires paths to not just match the given pattern but also have the
specified attrs attached for them to be chosen.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/glossary-content.txt |  17 ++++
 dir.c                              |  35 ++++++++
 pathspec.c                         |  97 +++++++++++++++++++++-
 pathspec.h                         |  16 ++++
 t/t6134-pathspec-with-labels.sh    | 166 +++++++++++++++++++++++++++++++++++++
 5 files changed, 327 insertions(+), 4 deletions(-)
 create mode 100755 t/t6134-pathspec-with-labels.sh

diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
index cafc284..aa9f220 100644
--- a/Documentation/glossary-content.txt
+++ b/Documentation/glossary-content.txt
@@ -384,6 +384,23 @@ full pathname may have special meaning:
 +
 Glob magic is incompatible with literal magic.
 
+attr;;
+	Additionally to matching the pathspec, the path must have the
+	attribute as specified. The syntax for specifying the required
+	attributes is "`attr: [mode] <attribute name> [=value]`"
++
+Attributes can have 4 states (Set, Unset, Set to a value, unspecified) and
+you can query each attribute for certain states. The "`[mode]`" is a special
+character to indicate which attribute states are looked for. The following
+modes are available:
+
+ - an empty "`[mode]`" matches if the attribute is set
+ - "`-`" the attribute must be unset
+ - "`!`" the attribute must be unspecified
+ - an empty "`[mode]`" combined with "`[=value]`" matches if the attribute has
+   the given value.
++
+
 exclude;;
 	After a path matches any non-exclude pathspec, it will be run
 	through all exclude pathspec (magic signature: `!`). If it
diff --git a/dir.c b/dir.c
index 996653b..f60a503 100644
--- a/dir.c
+++ b/dir.c
@@ -9,6 +9,7 @@
  */
 #include "cache.h"
 #include "dir.h"
+#include "attr.h"
 #include "refs.h"
 #include "wildmatch.h"
 #include "pathspec.h"
@@ -215,6 +216,37 @@ int within_depth(const char *name, int namelen,
 	return 1;
 }
 
+static int match_attrs(const char *name, int namelen,
+		       const struct pathspec_item *item)
+{
+	int i;
+
+	git_check_attr_counted(name, namelen, item->attr_check);
+	for (i = 0; i < item->attr_match_nr; i++) {
+		const char *value;
+		int matched;
+		enum attr_match_mode match_mode;
+
+		value = item->attr_check->check[i].value;
+		match_mode = item->attr_match[i].match_mode;
+
+		if (ATTR_TRUE(value))
+			matched = match_mode == MATCH_SET;
+		else if (ATTR_FALSE(value))
+			matched = match_mode == MATCH_UNSET;
+		else if (ATTR_UNSET(value))
+			matched = match_mode == MATCH_UNSPECIFIED;
+		else
+			matched = (match_mode == MATCH_VALUE &&
+				   !strcmp(item->attr_match[i].value, value));
+
+		if (!matched)
+			return 0;
+	}
+
+	return 1;
+}
+
 #define DO_MATCH_EXCLUDE   1
 #define DO_MATCH_DIRECTORY 2
 
@@ -270,6 +302,9 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
 	    strncmp(item->match, name - prefix, item->prefix))
 		return 0;
 
+	if (item->attr_match_nr && !match_attrs(name, namelen, item))
+		return 0;
+
 	/* If the match was just the prefix, we matched */
 	if (!*match)
 		return MATCHED_RECURSIVELY;
diff --git a/pathspec.c b/pathspec.c
index 4dff252..b795a9c 100644
--- a/pathspec.c
+++ b/pathspec.c
@@ -1,6 +1,7 @@
 #include "cache.h"
 #include "dir.h"
 #include "pathspec.h"
+#include "attr.h"
 
 /*
  * Finds which of the given pathspecs match items in the index.
@@ -88,12 +89,74 @@ static void prefix_short_magic(struct strbuf *sb, int prefixlen,
 	strbuf_addf(sb, ",prefix:%d)", prefixlen);
 }
 
+static void parse_pathspec_attr_match(struct pathspec_item *item, const char *value)
+{
+	struct string_list_item *si;
+	struct string_list list = STRING_LIST_INIT_DUP;
+
+
+	if (!value || !strlen(value))
+		die(_("attr spec must not be empty"));
+
+	string_list_split(&list, value, ' ', -1);
+	string_list_remove_empty_items(&list, 0);
+
+	if (!item->attr_check)
+		item->attr_check = git_attr_check_alloc();
+	else
+		die(_("Only one 'attr:' specification is allowed."));
+
+	ALLOC_GROW(item->attr_match, item->attr_match_nr + list.nr, item->attr_match_alloc);
+
+	for_each_string_list_item(si, &list) {
+		size_t attr_len;
+
+		int j = item->attr_match_nr++;
+		const char *attr = si->string;
+		struct attr_match *am = &item->attr_match[j];
+
+		if (attr[0] == '!')
+			am->match_mode = MATCH_UNSPECIFIED;
+		else if (attr[0] == '-')
+			am->match_mode = MATCH_UNSET;
+		else
+			am->match_mode = MATCH_SET;
+
+		if (am->match_mode != MATCH_SET)
+			/* skip first character */
+			attr++;
+
+		attr_len = strcspn(attr, "=");
+		if (attr[attr_len] == '=') {
+			am->match_mode = MATCH_VALUE;
+			am->value = xstrdup(&attr[attr_len + 1]);
+			if (strchr(am->value, '\\'))
+				die(_("attr spec values must not contain backslashes"));
+		} else
+			am->value = NULL;
+
+		if (!attr_name_valid(attr, attr_len)) {
+			struct strbuf sb = STRBUF_INIT;
+			am->match_mode = INVALID_ATTR;
+			invalid_attr_name_message(&sb, attr, attr_len);
+			die(_("invalid attribute in '%s': '%s'"), value, sb.buf);
+		}
+
+		am->attr = git_attr_counted(attr, attr_len);
+		git_attr_check_append(item->attr_check, am->attr);
+	}
+
+	string_list_clear(&list, 0);
+	return;
+}
+
 static void eat_long_magic(struct pathspec_item *item, const char *elt,
 		unsigned *magic, int *pathspec_prefix,
 		const char **copyfrom_, const char **long_magic_end)
 {
 	int i;
 	const char *copyfrom = *copyfrom_;
+	const char *body;
 	/* longhand */
 	const char *nextat;
 	for (copyfrom = elt + 2;
@@ -108,15 +171,21 @@ static void eat_long_magic(struct pathspec_item *item, const char *elt,
 		if (!len)
 			continue;
 
-		if (starts_with(copyfrom, "prefix:")) {
+		if (skip_prefix(copyfrom, "prefix:", &body)) {
 			char *endptr;
-			*pathspec_prefix = strtol(copyfrom + 7,
-						  &endptr, 10);
+			*pathspec_prefix = strtol(body, &endptr, 10);
 			if (endptr - copyfrom != len)
 				die(_("invalid parameter for pathspec magic 'prefix'"));
 			continue;
 		}
 
+		if (skip_prefix(copyfrom, "attr:", &body)) {
+			char *attr_body = xmemdupz(body, len - strlen("attr:"));
+			parse_pathspec_attr_match(item, attr_body);
+			free(attr_body);
+			continue;
+		}
+
 		for (i = 0; i < ARRAY_SIZE(pathspec_magic); i++) {
 			if (strlen(pathspec_magic[i].name) == len &&
 			    !strncmp(pathspec_magic[i].name, copyfrom, len)) {
@@ -425,7 +494,10 @@ void parse_pathspec(struct pathspec *pathspec,
 	for (i = 0; i < n; i++) {
 		unsigned short_magic;
 		entry = argv[i];
-
+		item[i].attr_check = NULL;
+		item[i].attr_match = NULL;
+		item[i].attr_match_nr = 0;
+		item[i].attr_match_alloc = 0;
 		item[i].magic = prefix_pathspec(item + i, &short_magic,
 						argv + i, flags,
 						prefix, prefixlen, entry);
@@ -447,6 +519,13 @@ void parse_pathspec(struct pathspec *pathspec,
 		if (item[i].nowildcard_len < item[i].len)
 			pathspec->has_wildcard = 1;
 		pathspec->magic |= item[i].magic;
+
+		if (item[i].attr_match_nr) {
+			int j;
+			for (j = 0; j < item[i].attr_match_nr; j++)
+				if (item[i].attr_match[j].match_mode == INVALID_ATTR)
+					die(_("attribute spec in the wrong syntax are prohibited."));
+		}
 	}
 
 	if (nr_exclude == n)
@@ -502,6 +581,16 @@ void copy_pathspec(struct pathspec *dst, const struct pathspec *src)
 
 void free_pathspec(struct pathspec *pathspec)
 {
+	int i, j;
+	for (i = 0; i < pathspec->nr; i++) {
+		if (!pathspec->items[i].attr_match_nr)
+			continue;
+		for (j = 0; j < pathspec->items[j].attr_match_nr; j++)
+			free(pathspec->items[i].attr_match[j].value);
+		free(pathspec->items[i].attr_match);
+		git_attr_check_free(pathspec->items[i].attr_check);
+	}
+
 	free(pathspec->items);
 	pathspec->items = NULL;
 }
diff --git a/pathspec.h b/pathspec.h
index 0c11262..5308137 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -32,6 +32,22 @@ struct pathspec {
 		int len, prefix;
 		int nowildcard_len;
 		int flags;
+		int attr_match_nr;
+		int attr_match_alloc;
+		struct attr_match {
+			struct git_attr *attr;
+			char *value;
+			enum attr_match_mode {
+				MATCH_SET,
+				MATCH_UNSET,
+				MATCH_VALUE,
+				MATCH_UNSPECIFIED,
+				MATCH_NOT_UNSPECIFIED,
+				MATCH_SET_OR_VALUE,
+				INVALID_ATTR
+			} match_mode;
+		} *attr_match;
+		struct git_attr_check *attr_check;
 	} *items;
 };
 
diff --git a/t/t6134-pathspec-with-labels.sh b/t/t6134-pathspec-with-labels.sh
new file mode 100755
index 0000000..c0d8cda
--- /dev/null
+++ b/t/t6134-pathspec-with-labels.sh
@@ -0,0 +1,166 @@
+#!/bin/sh
+
+test_description='test labels in pathspecs'
+. ./test-lib.sh
+
+test_expect_success 'setup a tree' '
+	mkdir sub &&
+	for p in fileA fileB fileC fileAB fileAC fileBC fileNoLabel fileUnsetLabel fileSetLabel fileValue fileWrongLabel; do
+		: >$p &&
+		git add $p &&
+		: >sub/$p
+		git add sub/$p
+	done &&
+	git commit -m $p &&
+	git ls-files >actual &&
+	cat <<EOF >expect &&
+fileA
+fileAB
+fileAC
+fileB
+fileBC
+fileC
+fileNoLabel
+fileSetLabel
+fileUnsetLabel
+fileValue
+fileWrongLabel
+sub/fileA
+sub/fileAB
+sub/fileAC
+sub/fileB
+sub/fileBC
+sub/fileC
+sub/fileNoLabel
+sub/fileSetLabel
+sub/fileUnsetLabel
+sub/fileValue
+sub/fileWrongLabel
+EOF
+	test_cmp expect actual
+'
+
+test_expect_success 'pathspec with no attr' '
+	test_must_fail git ls-files ":(attr:)" 2>actual &&
+	test_i18ngrep fatal actual
+'
+
+test_expect_success 'pathspec with labels and non existent .gitattributes' '
+	git ls-files ":(attr:label)" >actual &&
+	test_must_be_empty actual
+'
+
+test_expect_success 'setup .gitattributes' '
+	cat <<EOF >.gitattributes &&
+fileA labelA
+fileB labelB
+fileC labelC
+fileAB labelA labelB
+fileAC labelA labelC
+fileBC labelB labelC
+fileUnsetLabel -label
+fileSetLabel label
+fileValue label=foo
+fileWrongLabel label☺
+EOF
+	git add .gitattributes &&
+	git commit -m "add attributes"
+'
+
+sq="'"
+
+test_expect_success 'check specific set attr' '
+	cat <<EOF >expect &&
+fileSetLabel
+sub/fileSetLabel
+EOF
+	git ls-files ":(attr:label)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'check specific unset attr' '
+	cat <<EOF >expect &&
+fileUnsetLabel
+sub/fileUnsetLabel
+EOF
+	git ls-files ":(attr:-label)" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'check specific value attr' '
+	cat <<EOF >expect &&
+fileValue
+sub/fileValue
+EOF
+	git ls-files ":(attr:label=foo)" >actual &&
+	test_cmp expect actual &&
+	git ls-files ":(attr:label=bar)" >actual &&
+	test_must_be_empty actual
+'
+
+test_expect_success 'check unspecified attr' '
+	cat <<EOF >expect &&
+.gitattributes
+fileA
+fileAB
+fileAC
+fileB
+fileBC
+fileC
+fileNoLabel
+fileWrongLabel
+sub/fileA
+sub/fileAB
+sub/fileAC
+sub/fileB
+sub/fileBC
+sub/fileC
+sub/fileNoLabel
+sub/fileWrongLabel
+EOF
+	git ls-files :\(attr:\!label\) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'check multiple unspecified attr' '
+	cat <<EOF >expect &&
+.gitattributes
+fileC
+fileNoLabel
+fileWrongLabel
+sub/fileC
+sub/fileNoLabel
+sub/fileWrongLabel
+EOF
+	git ls-files :\(attr:\!labelB\ \!labelA\ \!label\) >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'check label with more labels but excluded path' '
+	cat <<EOF >expect &&
+fileAB
+fileB
+fileBC
+EOF
+	git ls-files ":(attr:labelB)" ":(exclude)sub/" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'check label excluding other labels' '
+	cat <<EOF >expect &&
+fileAB
+fileB
+fileBC
+sub/fileAB
+sub/fileB
+EOF
+	git ls-files ":(attr:labelB)" ":(exclude,attr:labelC)sub/" >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'abort on giving invalid label on the command line' '
+	test_must_fail git ls-files . ":(attr:☺)" 2>actual &&
+	test_i18ngrep "fatal" actual
+'
+
+test_done
-- 
2.8.2.123.g3bde101

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 1/5] string list: improve comment
  2016-05-19  1:09 ` [PATCHv8 1/5] string list: improve comment Stefan Beller
@ 2016-05-19 18:08   ` Junio C Hamano
  2016-05-19 18:12     ` Stefan Beller
  0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2016-05-19 18:08 UTC (permalink / raw)
  To: Stefan Beller; +Cc: pclouds, git

Stefan Beller <sbeller@google.com> writes:

> Signed-off-by: Stefan Beller <sbeller@google.com>
> ---
>  string-list.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/string-list.h b/string-list.h
> index d3809a1..465a1f0 100644
> --- a/string-list.h
> +++ b/string-list.h
> @@ -106,7 +106,7 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
>   * list->strdup_strings must be set, as new memory needs to be
>   * allocated to hold the substrings.  If maxsplit is non-negative,
>   * then split at most maxsplit times.  Return the number of substrings
> - * appended to list.
> + * appended to list. The list may be non-empty already.

I personally find that the original comment is clear enough, though.

When somebody says "resulting elements of the split are appended to
the list" without saying either:

  a. "the list MUST be empty in the beginning", or
  b. "the list will be emptied first before the split result are appended",

wouldn't it be natural to take it as "you can append them to any
list, be it empty or not, and they are _appended_, not replaced"?

So while this is not incorrect per-se, I am not sure if it adds much
value.  If somebody needs this additional clarification, because the
original did not say a. above, she would certainly need more
clarification because the original did not say b. above, either.

"The list may be non-empty already", but she would keep wondering if
the existing contents would be discarded before the result gets
appended to it.

You may say "No, there won't be such a confusion, because we say
'append'; empty and then append is 'replace'".  But then the same
logic would say "There cannot be a requirement for the list to be
empty in the first place, because we say 'append'".

So...

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 1/5] string list: improve comment
  2016-05-19 18:08   ` Junio C Hamano
@ 2016-05-19 18:12     ` Stefan Beller
  0 siblings, 0 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19 18:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, git@vger.kernel.org

On Thu, May 19, 2016 at 11:08 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> Signed-off-by: Stefan Beller <sbeller@google.com>
>> ---
>>  string-list.h | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/string-list.h b/string-list.h
>> index d3809a1..465a1f0 100644
>> --- a/string-list.h
>> +++ b/string-list.h
>> @@ -106,7 +106,7 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
>>   * list->strdup_strings must be set, as new memory needs to be
>>   * allocated to hold the substrings.  If maxsplit is non-negative,
>>   * then split at most maxsplit times.  Return the number of substrings
>> - * appended to list.
>> + * appended to list. The list may be non-empty already.
>
> I personally find that the original comment is clear enough, though.
>
> When somebody says "resulting elements of the split are appended to
> the list" without saying either:
>
>   a. "the list MUST be empty in the beginning", or
>   b. "the list will be emptied first before the split result are appended",
>
> wouldn't it be natural to take it as "you can append them to any
> list, be it empty or not, and they are _appended_, not replaced"?

That is true. I missed that though when reading the documentation and
read the source code to be clear.

>
> So while this is not incorrect per-se, I am not sure if it adds much
> value.  If somebody needs this additional clarification, because the
> original did not say a. above, she would certainly need more
> clarification because the original did not say b. above, either.
>
> "The list may be non-empty already", but she would keep wondering if
> the existing contents would be discarded before the result gets
> appended to it.
>
> You may say "No, there won't be such a confusion, because we say
> 'append'; empty and then append is 'replace'".  But then the same
> logic would say "There cannot be a requirement for the list to be
> empty in the first place, because we say 'append'".
>
> So...

So, please drop?

I do not feel strongly about this patch. I basically wrote to for myself
after I consulted the source.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 5/5] pathspec: allow querying for attributes
  2016-05-19  1:09 ` [PATCHv8 5/5] pathspec: allow querying for attributes Stefan Beller
@ 2016-05-19 18:53   ` Junio C Hamano
  2016-05-19 20:42     ` Stefan Beller
  2016-05-19 19:37   ` Junio C Hamano
  1 sibling, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2016-05-19 18:53 UTC (permalink / raw)
  To: Stefan Beller; +Cc: pclouds, git

Stefan Beller <sbeller@google.com> writes:

> diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
> index cafc284..aa9f220 100644
> --- a/Documentation/glossary-content.txt
> +++ b/Documentation/glossary-content.txt
> @@ -384,6 +384,23 @@ full pathname may have special meaning:
>  +
>  Glob magic is incompatible with literal magic.
>  
> +attr;;
> +	Additionally to matching the pathspec, the path must have the
> +	attribute as specified. The syntax for specifying the required
> +	attributes is "`attr: [mode] <attribute name> [=value]`"
> ++
> +Attributes can have 4 states (Set, Unset, Set to a value, unspecified) and
> +you can query each attribute for certain states. The "`[mode]`" is a special
> +character to indicate which attribute states are looked for. The following
> +modes are available:
> +
> + - an empty "`[mode]`" matches if the attribute is set
> + - "`-`" the attribute must be unset
> + - "`!`" the attribute must be unspecified
> + - an empty "`[mode]`" combined with "`[=value]`" matches if the attribute has
> +   the given value.
> ++

As an initial design, I find this much more agreeable than the
previous rounds.  I however find the phrasing of the above harder
than necessary to understand, for a few reasons.

 * Mixed use of "X matches if ..." and "... must be Y" makes it
   unclear if they are talking about different kind of things, or
   the same kind of things in merely different ways.

 * It does not make it clear "=value" is only meaningful when [mode]
   is empty.

Perhaps dropping the '[mode]' thing altogether and instead saying

        After `attr:` comes a space separated list of "attribute
       	requirements", all of which must be met in order for the
       	path to be considered a match; this is in addition to the
       	usual non-magic pathspec pattern matching.

	Each of the attribute requirements for the path takes one of
	these forms:

	- "`ATTR`" requires that the attribute `ATTR` must be set.

	- "`-ATTR`" requires that the attribute `ATTR` must be unset.

        - "`ATTR=VALUE`" requires that the attribute `ATTR` must be
          set to the string `VALUE`.

        - "`!ATTR`" requires that the attribute `ATTR` must be
          unspecified.

would make the resulting text easier to read?

> +static int match_attrs(const char *name, int namelen,
> +		       const struct pathspec_item *item)
> +{
> +	int i;
> +
> +	git_check_attr_counted(name, namelen, item->attr_check);
> +	for (i = 0; i < item->attr_match_nr; i++) {
> +		const char *value;
> +		int matched;
> +		enum attr_match_mode match_mode;
> +
> +		value = item->attr_check->check[i].value;
> +		match_mode = item->attr_match[i].match_mode;
> +
> +		if (ATTR_TRUE(value))
> +			matched = match_mode == MATCH_SET;
> +		else if (ATTR_FALSE(value))
> +			matched = match_mode == MATCH_UNSET;
> +		else if (ATTR_UNSET(value))
> +			matched = match_mode == MATCH_UNSPECIFIED;

readability nit:

	matched = (match_mode == MATCH_WHATEVER);

would be easier to view

> +		else
> +			matched = (match_mode == MATCH_VALUE &&
> +				   !strcmp(item->attr_match[i].value, value));

and would match the last case above better.

> +static void parse_pathspec_attr_match(struct pathspec_item *item, const char *value)
> +{
> +	struct string_list_item *si;
> +	struct string_list list = STRING_LIST_INIT_DUP;
> +
> +
> +	if (!value || !strlen(value))
> +		die(_("attr spec must not be empty"));
> +
> +	string_list_split(&list, value, ' ', -1);
> +	string_list_remove_empty_items(&list, 0);
> +
> +	if (!item->attr_check)
> +		item->attr_check = git_attr_check_alloc();
> +	else
> +		die(_("Only one 'attr:' specification is allowed."));
> +
> +	ALLOC_GROW(item->attr_match, item->attr_match_nr + list.nr, item->attr_match_alloc);
> +
> +	for_each_string_list_item(si, &list) {
> +		size_t attr_len;
> +
> +		int j = item->attr_match_nr++;
> +		const char *attr = si->string;
> +		struct attr_match *am = &item->attr_match[j];
> +
> +		if (attr[0] == '!')
> +			am->match_mode = MATCH_UNSPECIFIED;
> +		else if (attr[0] == '-')
> +			am->match_mode = MATCH_UNSET;
> +		else
> +			am->match_mode = MATCH_SET;
> +
> +		if (am->match_mode != MATCH_SET)
> +			/* skip first character */
> +			attr++;
> +		attr_len = strcspn(attr, "=");
> +		if (attr[attr_len] == '=') {
> +			am->match_mode = MATCH_VALUE;
> +			am->value = xstrdup(&attr[attr_len + 1]);
> +			if (strchr(am->value, '\\'))
> +				die(_("attr spec values must not contain backslashes"));
> +		} else
> +			am->value = NULL;
> +

Let me try a variant to see if we can improve it (thinking aloud).

	switch (*attr) {
        case '!':
        	am->match_mode = MATCH_UNSPECIFIED;
		attr++;
                break;
        case '-':
        	am->match_mode = MATCH_UNSET;
		attr++;
                break;
	default:
		attr_len = strcspn(attr, "=");
		if (attr[attr_len] != '=')
			am->match_mode = MATCH_SET;
		else {
			am->match_mode = MATCH_VALUE;
	                am->value = xstrdup(...);
		}
                break;
	}

Using switch/case does not save line counts at all but it may still
make the result easier to follow.  More importantly, it would help
you catch "!ATTR=VAR" as an "invalid attribute name 'ATTR=VAR' was
requested" error by arranging the code to make sure that scanning
for '=' would not happen in !/- case (in other words, while thiking
aloud with an alternative way to write the same thing, I spotted a
bug in the original ;-).

> +		if (!attr_name_valid(attr, attr_len)) {
> +			struct strbuf sb = STRBUF_INIT;
> +			am->match_mode = INVALID_ATTR;
> +			invalid_attr_name_message(&sb, attr, attr_len);
> +			die(_("invalid attribute in '%s': '%s'"), value, sb.buf);
> +		}
> +
> +		am->attr = git_attr_counted(attr, attr_len);

I'd do this the other order, i.e.

	am->attr = git_attr_counted(...);
        if (!am->attr) {
		...
                die(...);
	}

It is wasteful to call attr_name_valid() yourself before seeing an
error from git_attr_counted().

> diff --git a/t/t6134-pathspec-with-labels.sh b/t/t6134-pathspec-with-labels.sh
> new file mode 100755
> index 0000000..c0d8cda
> --- /dev/null
> +++ b/t/t6134-pathspec-with-labels.sh
> @@ -0,0 +1,166 @@
> +#!/bin/sh
> +
> +test_description='test labels in pathspecs'
> +. ./test-lib.sh
> +
> +test_expect_success 'setup a tree' '
> +	mkdir sub &&
> +	for p in fileA fileB fileC fileAB fileAC fileBC fileNoLabel fileUnsetLabel fileSetLabel fileValue fileWrongLabel; do
> +		: >$p &&
> +		git add $p &&
> +		: >sub/$p
> +		git add sub/$p
> +	done &&
> +	git commit -m $p &&
> +	git ls-files >actual &&
> +	cat <<EOF >expect &&
> +fileA
> +fileAB
> +fileAC
> +fileB
> +fileBC
> +fileC
> +fileNoLabel
> +fileSetLabel
> +fileUnsetLabel
> +fileValue
> +fileWrongLabel
> +sub/fileA
> +sub/fileAB
> +sub/fileAC
> +sub/fileB
> +sub/fileBC
> +sub/fileC
> +sub/fileNoLabel
> +sub/fileSetLabel
> +sub/fileUnsetLabel
> +sub/fileValue
> +sub/fileWrongLabel
> +EOF

	cat <<-\EOF >expect &&
        fileA
        ...
        EOF

to indent the whole thing?

> +test_expect_success 'setup .gitattributes' '
> +	cat <<EOF >.gitattributes &&

Likewise.

> +fileWrongLabel label☺
> +EOF
> +	git add .gitattributes &&
> +	git commit -m "add attributes"
> +'
> +
> +sq="'"

I do not think you use this $sq variable below, but I may have
overlooked its use.

> +
> +test_expect_success 'check specific set attr' '
> +	cat <<EOF >expect &&
> +fileSetLabel
> +sub/fileSetLabel
> +EOF
> +	git ls-files ":(attr:label)" >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'check specific unset attr' '
> +	cat <<EOF >expect &&
> +fileUnsetLabel
> +sub/fileUnsetLabel
> +EOF
> +	git ls-files ":(attr:-label)" >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'check specific value attr' '
> +	cat <<EOF >expect &&
> +fileValue
> +sub/fileValue
> +EOF
> +	git ls-files ":(attr:label=foo)" >actual &&
> +	test_cmp expect actual &&
> +	git ls-files ":(attr:label=bar)" >actual &&
> +	test_must_be_empty actual
> +'
> +
> +test_expect_success 'check unspecified attr' '
> +	cat <<EOF >expect &&
> +.gitattributes
> +...
> +EOF
> +	git ls-files :\(attr:\!label\) >actual &&

Why not the more normal-looking ":(attr:!label)"?  I do not think
history substitution would trigger in an script.

And no, "I may cut and paste into my interactive shell while
debugging" is not a good reason to make the end-result (i.e. script)
less readable.

> +test_expect_success 'check multiple unspecified attr' '
> +	cat <<EOF >expect &&
> +.gitattributes
> +fileC
> +fileNoLabel
> +fileWrongLabel
> +sub/fileC
> +sub/fileNoLabel
> +sub/fileWrongLabel
> +EOF
> +	git ls-files :\(attr:\!labelB\ \!labelA\ \!label\) >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'check label with more labels but excluded path' '
> +	cat <<EOF >expect &&
> +fileAB
> +fileB
> +fileBC
> +EOF
> +	git ls-files ":(attr:labelB)" ":(exclude)sub/" >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'check label excluding other labels' '
> +	cat <<EOF >expect &&
> +fileAB
> +fileB
> +fileBC
> +sub/fileAB
> +sub/fileB
> +EOF
> +	git ls-files ":(attr:labelB)" ":(exclude,attr:labelC)sub/" >actual &&
> +	test_cmp expect actual
> +'
> +
> +test_expect_success 'abort on giving invalid label on the command line' '
> +	test_must_fail git ls-files . ":(attr:☺)" 2>actual &&
> +	test_i18ngrep "fatal" actual
> +'
> +
> +test_done

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
                   ` (4 preceding siblings ...)
  2016-05-19  1:09 ` [PATCHv8 5/5] pathspec: allow querying for attributes Stefan Beller
@ 2016-05-19 18:55 ` Junio C Hamano
  2016-05-19 21:00   ` Stefan Beller
  5 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2016-05-19 18:55 UTC (permalink / raw)
  To: Stefan Beller; +Cc: pclouds, git

I think this round is 99% there.  The next step would be to answer
"does the feature set we have here meet your needs that you wanted
to fill with the submodule labels originally?" and I am hoping it is
"yes".

Thanks.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 5/5] pathspec: allow querying for attributes
  2016-05-19  1:09 ` [PATCHv8 5/5] pathspec: allow querying for attributes Stefan Beller
  2016-05-19 18:53   ` Junio C Hamano
@ 2016-05-19 19:37   ` Junio C Hamano
  1 sibling, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2016-05-19 19:37 UTC (permalink / raw)
  To: Stefan Beller; +Cc: pclouds, git

Stefan Beller <sbeller@google.com> writes:

> +test_expect_success 'setup a tree' '
> +	mkdir sub &&
> +	for p in fileA fileB fileC fileAB fileAC fileBC fileNoLabel fileUnsetLabel fileSetLabel fileValue fileWrongLabel; do
> +		: >$p &&
> +		git add $p &&
> +		: >sub/$p
> +		git add sub/$p
> +	done &&
> +	git commit -m $p &&

What does this $p refer to?

> +	git ls-files >actual &&
> +	cat <<EOF >expect &&
> +fileA
> +fileAB
> +fileAC
> +fileB
> +fileBC
> +fileC
> +fileNoLabel
> +fileSetLabel
> +fileUnsetLabel
> +fileValue
> +fileWrongLabel
> +sub/fileA
> +sub/fileAB
> +sub/fileAC
> +sub/fileB
> +sub/fileBC
> +sub/fileC
> +sub/fileNoLabel
> +sub/fileSetLabel
> +sub/fileUnsetLabel
> +sub/fileValue
> +sub/fileWrongLabel
> +EOF
> +	test_cmp expect actual
> +'

If I were doing this, I'd prepare the list of paths (i.e. expect)
first and then create these paths using that list, i.e.

test_expect_success 'setup a tree' '
	cat <<-\EOF >expect &&
	fileA
	fileAB
	...
	sub/fileWrongLabel
	EOF
	mkdir sub &&
	while read path
	do
		: >$path &&
		git add $path || return 1
	done <expect &&
	git commit -m initial &&
	git ls-files >actual &&
	test_cmp expect actual
'

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 5/5] pathspec: allow querying for attributes
  2016-05-19 18:53   ` Junio C Hamano
@ 2016-05-19 20:42     ` Stefan Beller
  2016-05-19 21:00       ` Junio C Hamano
  0 siblings, 1 reply; 22+ messages in thread
From: Stefan Beller @ 2016-05-19 20:42 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, git@vger.kernel.org

On Thu, May 19, 2016 at 11:53 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt
>> index cafc284..aa9f220 100644
>> --- a/Documentation/glossary-content.txt
>> +++ b/Documentation/glossary-content.txt
>> @@ -384,6 +384,23 @@ full pathname may have special meaning:
>>  +
>>  Glob magic is incompatible with literal magic.
>>
>> +attr;;
>> +     Additionally to matching the pathspec, the path must have the
>> +     attribute as specified. The syntax for specifying the required
>> +     attributes is "`attr: [mode] <attribute name> [=value]`"
>> ++
>> +Attributes can have 4 states (Set, Unset, Set to a value, unspecified) and
>> +you can query each attribute for certain states. The "`[mode]`" is a special
>> +character to indicate which attribute states are looked for. The following
>> +modes are available:
>> +
>> + - an empty "`[mode]`" matches if the attribute is set
>> + - "`-`" the attribute must be unset
>> + - "`!`" the attribute must be unspecified
>> + - an empty "`[mode]`" combined with "`[=value]`" matches if the attribute has
>> +   the given value.
>> ++
>
> As an initial design, I find this much more agreeable than the
> previous rounds.  I however find the phrasing of the above harder
> than necessary to understand, for a few reasons.
>
>  * Mixed use of "X matches if ..." and "... must be Y" makes it
>    unclear if they are talking about different kind of things, or
>    the same kind of things in merely different ways.
>
>  * It does not make it clear "=value" is only meaningful when [mode]
>    is empty.
>
> Perhaps dropping the '[mode]' thing altogether and instead saying
>
>         After `attr:` comes a space separated list of "attribute
>         requirements", all of which must be met in order for the
>         path to be considered a match; this is in addition to the
>         usual non-magic pathspec pattern matching.
>
>         Each of the attribute requirements for the path takes one of
>         these forms:
>
>         - "`ATTR`" requires that the attribute `ATTR` must be set.
>
>         - "`-ATTR`" requires that the attribute `ATTR` must be unset.
>
>         - "`ATTR=VALUE`" requires that the attribute `ATTR` must be
>           set to the string `VALUE`.
>
>         - "`!ATTR`" requires that the attribute `ATTR` must be
>           unspecified.
>
> would make the resulting text easier to read?

That is way better!

>
>> +static int match_attrs(const char *name, int namelen,
>> +                    const struct pathspec_item *item)
>> +{
>> +     int i;
>> +
>> +     git_check_attr_counted(name, namelen, item->attr_check);
>> +     for (i = 0; i < item->attr_match_nr; i++) {
>> +             const char *value;
>> +             int matched;
>> +             enum attr_match_mode match_mode;
>> +
>> +             value = item->attr_check->check[i].value;
>> +             match_mode = item->attr_match[i].match_mode;
>> +
>> +             if (ATTR_TRUE(value))
>> +                     matched = match_mode == MATCH_SET;
>> +             else if (ATTR_FALSE(value))
>> +                     matched = match_mode == MATCH_UNSET;
>> +             else if (ATTR_UNSET(value))
>> +                     matched = match_mode == MATCH_UNSPECIFIED;
>
> readability nit:
>
>         matched = (match_mode == MATCH_WHATEVER);
>
> would be easier to view

ok.

>
>> +             else
>> +                     matched = (match_mode == MATCH_VALUE &&
>> +                                !strcmp(item->attr_match[i].value, value));
>
> and would match the last case above better.
>
>> +static void parse_pathspec_attr_match(struct pathspec_item *item, const char *value)
>> +{
>> +     struct string_list_item *si;
>> +     struct string_list list = STRING_LIST_INIT_DUP;
>> +
>> +
>> +     if (!value || !strlen(value))
>> +             die(_("attr spec must not be empty"));
>> +
>> +     string_list_split(&list, value, ' ', -1);
>> +     string_list_remove_empty_items(&list, 0);
>> +
>> +     if (!item->attr_check)
>> +             item->attr_check = git_attr_check_alloc();
>> +     else
>> +             die(_("Only one 'attr:' specification is allowed."));
>> +
>> +     ALLOC_GROW(item->attr_match, item->attr_match_nr + list.nr, item->attr_match_alloc);
>> +
>> +     for_each_string_list_item(si, &list) {
>> +             size_t attr_len;
>> +
>> +             int j = item->attr_match_nr++;
>> +             const char *attr = si->string;
>> +             struct attr_match *am = &item->attr_match[j];
>> +
>> +             if (attr[0] == '!')
>> +                     am->match_mode = MATCH_UNSPECIFIED;
>> +             else if (attr[0] == '-')
>> +                     am->match_mode = MATCH_UNSET;
>> +             else
>> +                     am->match_mode = MATCH_SET;
>> +
>> +             if (am->match_mode != MATCH_SET)
>> +                     /* skip first character */
>> +                     attr++;
>> +             attr_len = strcspn(attr, "=");
>> +             if (attr[attr_len] == '=') {
>> +                     am->match_mode = MATCH_VALUE;
>> +                     am->value = xstrdup(&attr[attr_len + 1]);
>> +                     if (strchr(am->value, '\\'))
>> +                             die(_("attr spec values must not contain backslashes"));
>> +             } else
>> +                     am->value = NULL;
>> +
>
> Let me try a variant to see if we can improve it (thinking aloud).
>
>         switch (*attr) {
>         case '!':
>                 am->match_mode = MATCH_UNSPECIFIED;
>                 attr++;
>                 break;
>         case '-':
>                 am->match_mode = MATCH_UNSET;
>                 attr++;
>                 break;
>         default:
>                 attr_len = strcspn(attr, "=");
>                 if (attr[attr_len] != '=')
>                         am->match_mode = MATCH_SET;
>                 else {
>                         am->match_mode = MATCH_VALUE;
>                         am->value = xstrdup(...);
>                 }
>                 break;
>         }
>
> Using switch/case does not save line counts at all but it may still
> make the result easier to follow.  More importantly, it would help
> you catch "!ATTR=VAR" as an "invalid attribute name 'ATTR=VAR' was
> requested" error by arranging the code to make sure that scanning
> for '=' would not happen in !/- case (in other words, while thiking
> aloud with an alternative way to write the same thing, I spotted a
> bug in the original ;-).

That makes sense.

>
>> +             if (!attr_name_valid(attr, attr_len)) {
>> +                     struct strbuf sb = STRBUF_INIT;
>> +                     am->match_mode = INVALID_ATTR;
>> +                     invalid_attr_name_message(&sb, attr, attr_len);
>> +                     die(_("invalid attribute in '%s': '%s'"), value, sb.buf);
>> +             }
>> +
>> +             am->attr = git_attr_counted(attr, attr_len);
>
> I'd do this the other order, i.e.
>
>         am->attr = git_attr_counted(...);
>         if (!am->attr) {
>                 ...
>                 die(...);
>         }
>
> It is wasteful to call attr_name_valid() yourself before seeing an
> error from git_attr_counted().
>
>> diff --git a/t/t6134-pathspec-with-labels.sh b/t/t6134-pathspec-with-labels.sh
>> new file mode 100755
>> index 0000000..c0d8cda
>> --- /dev/null
>> +++ b/t/t6134-pathspec-with-labels.sh
>> @@ -0,0 +1,166 @@
>> +#!/bin/sh
>> +
>> +test_description='test labels in pathspecs'
>> +. ./test-lib.sh
>> +
>> +test_expect_success 'setup a tree' '
>> +     mkdir sub &&
>> +     for p in fileA fileB fileC fileAB fileAC fileBC fileNoLabel fileUnsetLabel fileSetLabel fileValue fileWrongLabel; do
>> +             : >$p &&
>> +             git add $p &&
>> +             : >sub/$p
>> +             git add sub/$p
>> +     done &&
>> +     git commit -m $p &&
>> +     git ls-files >actual &&
>> +     cat <<EOF >expect &&
>> +fileA
>> +fileAB
>> +fileAC
>> +fileB
>> +fileBC
>> +fileC
>> +fileNoLabel
>> +fileSetLabel
>> +fileUnsetLabel
>> +fileValue
>> +fileWrongLabel
>> +sub/fileA
>> +sub/fileAB
>> +sub/fileAC
>> +sub/fileB
>> +sub/fileBC
>> +sub/fileC
>> +sub/fileNoLabel
>> +sub/fileSetLabel
>> +sub/fileUnsetLabel
>> +sub/fileValue
>> +sub/fileWrongLabel
>> +EOF
>
>         cat <<-\EOF >expect &&
>         fileA
>         ...
>         EOF
>
> to indent the whole thing?

$ grep -r "cat" |grep "<<-"|wc -l
915
$ grep -r "cat" |grep "<<"|grep -v "<<-"| wc -l
1329

I was undecided what the prevailing style is, some did indent,
others did not.

Ok, will indent.

>> +sq="'"
>
> I do not think you use this $sq variable below, but I may have
> overlooked its use.
>

ok

>> +     git ls-files :\(attr:\!label\) >actual &&
>
> Why not the more normal-looking ":(attr:!label)"?  I do not think
> history substitution would trigger in an script.
>
> And no, "I may cut and paste into my interactive shell while
> debugging" is not a good reason to make the end-result (i.e. script)
> less readable.

ok

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-19 18:55 ` [PATCHv8 0/5] pathspec magic extension to search " Junio C Hamano
@ 2016-05-19 21:00   ` Stefan Beller
  2016-05-19 21:05     ` Junio C Hamano
  0 siblings, 1 reply; 22+ messages in thread
From: Stefan Beller @ 2016-05-19 21:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, git@vger.kernel.org

On Thu, May 19, 2016 at 11:55 AM, Junio C Hamano <gitster@pobox.com> wrote:
> I think this round is 99% there.  The next step would be to answer
> "does the feature set we have here meet your needs that you wanted
> to fill with the submodule labels originally?" and I am hoping it is
> "yes".

But it's no. (...not quite / not yet)

I thought about this question since sending out the
series and I think we would want to improve submodules more.

In the original patch series "submodule groups" I tried to achieve two goals:
 (A) a grouping scheme for submodules
 (B) some sort of persistent configuration for such a group

(A) will be mostly solved now by the pathspec attrs. It may be a bit
confusing for users, as any other submodule related configuration is done
in .gitmodules. Also when moving a submodule (changing its path on the file
system, not the name in the config), any configuration still applies
except for the grouping scheme, which may not match any more.

(B) requires some thought though. Here is my vision:

    1) Allow pathspecs for sparse checkout.

      I wonder if we just add support for that in .git/info-sparse-checkout
      or if we add a new file that is for pathspecs only, or we have a config
      option whether sparse-checkout follows pathspecs or gitignore patterns

    2) Teach `git clone` a new option `--sparse-checkout <pathspec>`
      When that option is set the pathspec is written into the new file from
      (1) and core.sparsecheckout is set to true

    3) Advertise to do a `git clone --sparse-checkout
:(attr:default_submodules)`

Going this way we would help making submodules not different but integrate more
into other concepts of Git. As a downside this would require touching
sparse checkout which may be more time consuming than just adding a
`git clone --init-submodules-by-label` which stores the labels and only upddates
those submodules.

Or are there other ideas how to go from here?

Thanks,
Stefan



>
> Thanks.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 5/5] pathspec: allow querying for attributes
  2016-05-19 20:42     ` Stefan Beller
@ 2016-05-19 21:00       ` Junio C Hamano
  0 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2016-05-19 21:00 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Duy Nguyen, git@vger.kernel.org

Stefan Beller <sbeller@google.com> writes:

> $ grep -r "cat" |grep "<<-"|wc -l
> 915
> $ grep -r "cat" |grep "<<"|grep -v "<<-"| wc -l
> 1329
>
> I was undecided what the prevailing style is, some did indent,
> others did not.

FWIW, newer ones tend to use "<<-"; just FYI.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-19 21:00   ` Stefan Beller
@ 2016-05-19 21:05     ` Junio C Hamano
  2016-05-19 21:25       ` Stefan Beller
  0 siblings, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2016-05-19 21:05 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Duy Nguyen, git@vger.kernel.org

Stefan Beller <sbeller@google.com> writes:

> (B) requires some thought though. Here is my vision:
>
>     1) Allow pathspecs for sparse checkout.
>
>       I wonder if we just add support for that in .git/info-sparse-checkout
>       or if we add a new file that is for pathspecs only, or we have a config
>       option whether sparse-checkout follows pathspecs or gitignore patterns
>
>     2) Teach `git clone` a new option `--sparse-checkout <pathspec>`
>       When that option is set the pathspec is written into the new file from
>       (1) and core.sparsecheckout is set to true
>
>     3) Advertise to do a `git clone --sparse-checkout
> :(attr:default_submodules)`
>
> Going this way we would help making submodules not different but integrate more
> into other concepts of Git. As a downside this would require touching
> sparse checkout which may be more time consuming than just adding a
> `git clone --init-submodules-by-label` which stores the labels and only upddates
> those submodules.
>
> Or are there other ideas how to go from here?

My take is to pretend sparse checkout does not exist at all and then
go from there ;-)  It is a poorly designed and implemented "concept"
that we do not want to spread around.

You were going to add defaultGroup and teach 'clone' (and other
commands) about it, and have clone find submodules in that Group,
right?  Isn't the pathspec magic just another way to introduce
how you express the "defaultGroup", i.e. not with the "label" thing
in .gitmodules, but with pathspec elements with attribute magic?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-19 21:05     ` Junio C Hamano
@ 2016-05-19 21:25       ` Stefan Beller
  2016-05-20 17:00         ` Junio C Hamano
  2016-05-22 11:45         ` Duy Nguyen
  0 siblings, 2 replies; 22+ messages in thread
From: Stefan Beller @ 2016-05-19 21:25 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, git@vger.kernel.org

On Thu, May 19, 2016 at 2:05 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> (B) requires some thought though. Here is my vision:
>>
>>     1) Allow pathspecs for sparse checkout.
>>
>>       I wonder if we just add support for that in .git/info-sparse-checkout
>>       or if we add a new file that is for pathspecs only, or we have a config
>>       option whether sparse-checkout follows pathspecs or gitignore patterns
>>
>>     2) Teach `git clone` a new option `--sparse-checkout <pathspec>`
>>       When that option is set the pathspec is written into the new file from
>>       (1) and core.sparsecheckout is set to true
>>
>>     3) Advertise to do a `git clone --sparse-checkout
>> :(attr:default_submodules)`
>>
>> Going this way we would help making submodules not different but integrate more
>> into other concepts of Git. As a downside this would require touching
>> sparse checkout which may be more time consuming than just adding a
>> `git clone --init-submodules-by-label` which stores the labels and only upddates
>> those submodules.
>>
>> Or are there other ideas how to go from here?
>
> My take is to pretend sparse checkout does not exist at all and then
> go from there ;-)  It is a poorly designed and implemented "concept"
> that we do not want to spread around.
>
> You were going to add defaultGroup and teach 'clone' (and other
> commands) about it, and have clone find submodules in that Group,
> right?

Right. But upon finding the new name for clone, I wondered why
this has to be submodule specific. The attr pathspecs are also working
with any other files. So if you don't use submodules, I think it would be
pretty cool to have a

    git clone --sparse-checkout=Documentation/ ...

> Isn't the pathspec magic just another way to introduce
> how you express the "defaultGroup", i.e. not with the "label" thing
> in .gitmodules, but with pathspec elements with attribute magic?

Right. So instead I could do a

    git clone --recursive --restrict-submodules-to=<pathspec> --save-restriction

and then later on

    git submodule update --use-restriction-saved-by-clone

I think we'd not want more than that for some first real tests
by some engineers.

However after thinking about this for a while I think it's too
submodule centric? The more special sauce we add for submodules
the harder they will be to maintain/support as they grow into their own
thing down the road.

Using these pathspec attrs are a good example for why we would
want to make it more generic. Imagine a future, where more attributes
can be codified into pathspecs and this is one of the possibilities:

    git clone --sparse=":(exclude,size>5MB,binary)

which would not checkout files that are large binary files. We could
also extend the protocol (v2 with the capabilities in client speaks first)
to transmit such a requirement to the server.

Why is sparseness considered bad?
(I did find only limited resources on the net, those bogs I found are
advertising it as the last remainder Git needed to be superior to svn in
any discipline; I did not find a lot of negative feedback on it. So I guess
usability and confusion?)

If I wanted to just do the submodule only thing, this would be a smaller
series, I would guess.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-19 21:25       ` Stefan Beller
@ 2016-05-20 17:00         ` Junio C Hamano
  2016-05-20 18:12           ` Stefan Beller
  2016-05-22 11:45         ` Duy Nguyen
  1 sibling, 1 reply; 22+ messages in thread
From: Junio C Hamano @ 2016-05-20 17:00 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Duy Nguyen, git@vger.kernel.org

Stefan Beller <sbeller@google.com> writes:

> Right. But upon finding the new name for clone, I wondered why
> this has to be submodule specific. The attr pathspecs are also working
> with any other files. So if you don't use submodules, I think it would be
> pretty cool to have a
>
>     git clone --sparse-checkout=Documentation/ ...

It would be cool, but arent' "sparse" and the various existing
status "submodule" has very different things?

 - A submodule can be uninitialized, in which case you do get an empty
   directory but you do not see .git in it.

 - A path can be excluded by the sparse checkout mechanism, in which
   case you do not get _anything_ in the filesystem.

So "git clone --sparse-exclude=Documentation/" that does not waste
diskspace for Documentation/ directory may be an interesting thing
to have, and "git clone --sparse-exclude=submodule-dir/" that does
not even create submodule-dir/ directory may also be, but the latter
is quite different from a submodule that is not initialied in a
superproject that does not use any "sparse" mechanism.

Besides, I think (improved) submodule mechanism would be a good way
forward for scalability, and "sparse" hack is not (primarily because
it still populates the index fully with 5 million entries even when
your attention is narrowed only to a handful of directories with
2000 leaf entries; this misdesign requires other ugly hacks to be
piled on, like untracked cache and split index).

I do not think we want "submodule" to be tied to and dependent on
the latter.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-20 17:00         ` Junio C Hamano
@ 2016-05-20 18:12           ` Stefan Beller
  2016-05-20 18:19             ` Junio C Hamano
  0 siblings, 1 reply; 22+ messages in thread
From: Stefan Beller @ 2016-05-20 18:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Duy Nguyen, git@vger.kernel.org

On Fri, May 20, 2016 at 10:00 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> Right. But upon finding the new name for clone, I wondered why
>> this has to be submodule specific. The attr pathspecs are also working
>> with any other files. So if you don't use submodules, I think it would be
>> pretty cool to have a
>>
>>     git clone --sparse-checkout=Documentation/ ...
>
> It would be cool, but arent' "sparse" and the various existing
> status "submodule" has very different things?

Yes they are. In one of the various "submodule groups" series I
proposed a "defaultGroup" which allows commands to ignore
some submodules. That was conceptually the very same as a
"sparse checkout, just for submodules", i.e. the submodule is
initialized and has a directory as a place holder, but most commands
ignore its existence.

We decided that was a bad thing, so now I think of a light weight
"submodule.updateGroup" which holds a pathspec and is only
used for "submodule update" commands that have no explicit
pathspec given. (That setting would be set via "git clone
--submodule-pathspec <pathspec>")

>
>  - A submodule can be uninitialized, in which case you do get an empty
>    directory but you do not see .git in it.
>
>  - A path can be excluded by the sparse checkout mechanism, in which
>    case you do not get _anything_ in the filesystem.

Yes, but isn't that one of the minor issues?

>
> So "git clone --sparse-exclude=Documentation/" that does not waste
> diskspace for Documentation/ directory may be an interesting thing
> to have, and "git clone --sparse-exclude=submodule-dir/" that does
> not even create submodule-dir/ directory may also be, but the latter
> is quite different from a submodule that is not initialied in a
> superproject that does not use any "sparse" mechanism.
>
> Besides, I think (improved) submodule mechanism would be a good way
> forward for scalability, and "sparse" hack is not (primarily because
> it still populates the index fully with 5 million entries even when
> your attention is narrowed only to a handful of directories with
> 2000 leaf entries; this misdesign requires other ugly hacks to be
> piled on, like untracked cache and split index).
>
> I do not think we want "submodule" to be tied to and dependent on
> the latter.

Ok I just wanted to probe how much resistance I get here as an
indicator of how much more work that would be.

Besides I think (improved) sparse mechanism would be a good way
to not confuse users between submodule scalability and single
repo scalability. ;)

We don't have to keep 5 million things in the index there, but we can
stop on the tree/directory level, i.e. if a whole directory is excluded
That's all we'd need to keep a record of, no?

As a user I'd prefer to be exposed to as few concepts as possible,
and adding yet another concept of sparseness is not a good thing
IMHO, so I'll try to keep it simple there.

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-20 18:12           ` Stefan Beller
@ 2016-05-20 18:19             ` Junio C Hamano
  0 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2016-05-20 18:19 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Duy Nguyen, git@vger.kernel.org

Stefan Beller <sbeller@google.com> writes:

> As a user I'd prefer to be exposed to as few concepts as possible,
> and adding yet another concept of sparseness is not a good thing
> IMHO, so I'll try to keep it simple there.

Yes, and as a user, I'd prefer if I (and all the users) do not have
to even worry about the "sparse" hack.

If you are using submodules, it was in its design from day one that
it is not unsual for many of your submodules to be uninitialized
(but still present).  And that is a pretty normal state.  I do not
think we want to expose users to the "sparse" hack only to use
submodules effectively.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-19 21:25       ` Stefan Beller
  2016-05-20 17:00         ` Junio C Hamano
@ 2016-05-22 11:45         ` Duy Nguyen
  2016-05-23 18:49           ` Stefan Beller
  1 sibling, 1 reply; 22+ messages in thread
From: Duy Nguyen @ 2016-05-22 11:45 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, git@vger.kernel.org

On Fri, May 20, 2016 at 4:25 AM, Stefan Beller <sbeller@google.com> wrote:
>> My take is to pretend sparse checkout does not exist at all and then
>> go from there ;-)

Hehe.. shameless plug, narrow checkout [1] should be its great
successor where everything is done right (famous last words). Maybe I
can convince Stefan to finish that off then I'll finally bring narrow
clone!

[1] http://article.gmane.org/gmane.comp.version-control.git/289112

> Using these pathspec attrs are a good example for why we would
> want to make it more generic. Imagine a future, where more attributes
> can be codified into pathspecs and this is one of the possibilities:
>
>     git clone --sparse=":(exclude,size>5MB,binary)
>
> which would not checkout files that are large binary files. We could
> also extend the protocol (v2 with the capabilities in client speaks first)
> to transmit such a requirement to the server.

I think you need narrow clone there ;-) It's the first step to have a
clone with missing directories. I think then we can extend it further
to exclude (large) files.

> Why is sparseness considered bad?

It does not scale well when the worktree gets bigger. It can be slow
(but this is just a technical problem I haven't spent time on fixing).
And it does not enable narrow clone (not with lots of hacks, I think I
did just that in some early iterations).

> If I wanted to just do the submodule only thing, this would be a smaller
> series, I would guess.

No no no. Do more. Start with narrow checkout. I'll help ;-)
-- 
Duy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-22 11:45         ` Duy Nguyen
@ 2016-05-23 18:49           ` Stefan Beller
  2016-05-24  2:00             ` Duy Nguyen
  0 siblings, 1 reply; 22+ messages in thread
From: Stefan Beller @ 2016-05-23 18:49 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Junio C Hamano, git@vger.kernel.org

On Sun, May 22, 2016 at 4:45 AM, Duy Nguyen <pclouds@gmail.com> wrote:
> On Fri, May 20, 2016 at 4:25 AM, Stefan Beller <sbeller@google.com> wrote:
>>> My take is to pretend sparse checkout does not exist at all and then
>>> go from there ;-)
>
> Hehe.. shameless plug, narrow checkout [1] should be its great
> successor where everything is done right (famous last words). Maybe I
> can convince Stefan to finish that off then I'll finally bring narrow
> clone!

I started reviewing that (see separate email).

So conceptually the narrow checkout is "sparse checkout just for
directories" IIUC.

A submodule is treated as a files (sometimes, e.g. diff) and sometimes
as a directory (e.g. in the working tree)

>
> [1] http://article.gmane.org/gmane.comp.version-control.git/289112
>
>> Using these pathspec attrs are a good example for why we would
>> want to make it more generic. Imagine a future, where more attributes
>> can be codified into pathspecs and this is one of the possibilities:
>>
>>     git clone --sparse=":(exclude,size>5MB,binary)
>>
>> which would not checkout files that are large binary files. We could
>> also extend the protocol (v2 with the capabilities in client speaks first)
>> to transmit such a requirement to the server.
>
> I think you need narrow clone there ;-) It's the first step to have a
> clone with missing directories. I think then we can extend it further
> to exclude (large) files.

But if we only exclude some files we are not having a binary
decision for a directory, so narrow checkout doesn't work here?

    git clone --sparse=":(exclude,size>5MB,binary)

would maybe lead to have
  dir/path.c #included
  dir/binary-asset.img # excluded

so it is more a sparse thing than a narrow thing?

>
>> Why is sparseness considered bad?
>
> It does not scale well when the worktree gets bigger. It can be slow
> (but this is just a technical problem I haven't spent time on fixing).
> And it does not enable narrow clone (not with lots of hacks, I think I
> did just that in some early iterations).
>
>> If I wanted to just do the submodule only thing, this would be a smaller
>> series, I would guess.
>
> No no no. Do more. Start with narrow checkout. I'll help ;-)

Thanks for the encouragement!
How is the interface going to look like for the narrow checkout/clone ?
If the UI is supposed to be very similar, we can merge the two concepts
and make it one thing.

But if the UI is different enough, we may want to keep it separate as
it is not as confusing to the user as we'd think?

What I imagine UI-wise, see
http://thread.gmane.org/gmane.comp.version-control.git/295221


Thanks,
Stefan

> --
> Duy

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCHv8 0/5] pathspec magic extension to search for attributes
  2016-05-23 18:49           ` Stefan Beller
@ 2016-05-24  2:00             ` Duy Nguyen
  0 siblings, 0 replies; 22+ messages in thread
From: Duy Nguyen @ 2016-05-24  2:00 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Junio C Hamano, git@vger.kernel.org

On Tue, May 24, 2016 at 1:49 AM, Stefan Beller <sbeller@google.com> wrote:
> On Sun, May 22, 2016 at 4:45 AM, Duy Nguyen <pclouds@gmail.com> wrote:
>> On Fri, May 20, 2016 at 4:25 AM, Stefan Beller <sbeller@google.com> wrote:
>>>> My take is to pretend sparse checkout does not exist at all and then
>>>> go from there ;-)
>>
>> Hehe.. shameless plug, narrow checkout [1] should be its great
>> successor where everything is done right (famous last words). Maybe I
>> can convince Stefan to finish that off then I'll finally bring narrow
>> clone!
>
> I started reviewing that (see separate email).
>
> So conceptually the narrow checkout is "sparse checkout just for
> directories" IIUC.

Yep.

> A submodule is treated as a files (sometimes, e.g. diff) and sometimes
> as a directory (e.g. in the working tree)

It's the same here. Submodules have a special mode in the index
(S_IFDIR + S_IF<something>) while dirs just have S_IFDIR. It's up to
the index user to treat it as a "file" or a directory (maybe only
unpack-trees)

>>> Using these pathspec attrs are a good example for why we would
>>> want to make it more generic. Imagine a future, where more attributes
>>> can be codified into pathspecs and this is one of the possibilities:
>>>
>>>     git clone --sparse=":(exclude,size>5MB,binary)
>>>
>>> which would not checkout files that are large binary files. We could
>>> also extend the protocol (v2 with the capabilities in client speaks first)
>>> to transmit such a requirement to the server.
>>
>> I think you need narrow clone there ;-) It's the first step to have a
>> clone with missing directories. I think then we can extend it further
>> to exclude (large) files.
>
> But if we only exclude some files we are not having a binary
> decision for a directory, so narrow checkout doesn't work here?
>
>     git clone --sparse=":(exclude,size>5MB,binary)
>
> would maybe lead to have
>   dir/path.c #included
>   dir/binary-asset.img # excluded
>
> so it is more a sparse thing than a narrow thing?

I wrote two paragraphs, only to realized that you were right. In the
end we just need a way to exchange "sparse" (not narrow) information
about this. Narrow clone may need to exchange more side information
(for git-replace) but I don't see how sparse files fit exactly in this
whole exchange yet.

>>> Why is sparseness considered bad?
>>
>> It does not scale well when the worktree gets bigger. It can be slow
>> (but this is just a technical problem I haven't spent time on fixing).
>> And it does not enable narrow clone (not with lots of hacks, I think I
>> did just that in some early iterations).
>>
>>> If I wanted to just do the submodule only thing, this would be a smaller
>>> series, I would guess.
>>
>> No no no. Do more. Start with narrow checkout. I'll help ;-)
>
> Thanks for the encouragement!
> How is the interface going to look like for the narrow checkout/clone ?
> If the UI is supposed to be very similar, we can merge the two concepts
> and make it one thing.
>
> But if the UI is different enough, we may want to keep it separate as
> it is not as confusing to the user as we'd think?

For narrow clone, I imagine you need to specify what directories, say
with --narrow-path option or simply --path, to clone, and that's it.
In future you may add --path-filter option that does
"exclude,size>5MB,binary" above.

For narrow checkout, we have two more operations: fold and unfold
directories. At plumbing level, maybe update-index can support these
operations explicitly. At porcelain level, checkout probably learns
about --narrow-path and submodule can make use of it if it wants.

> What I imagine UI-wise, see
> http://thread.gmane.org/gmane.comp.version-control.git/295221
>
>
> Thanks,
> Stefan
>
>> --
>> Duy



-- 
Duy

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2016-05-24  2:00 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-19  1:09 [PATCHv8 0/5] pathspec magic extension to search for attributes Stefan Beller
2016-05-19  1:09 ` [PATCHv8 1/5] string list: improve comment Stefan Beller
2016-05-19 18:08   ` Junio C Hamano
2016-05-19 18:12     ` Stefan Beller
2016-05-19  1:09 ` [PATCHv8 2/5] Documentation: fix a typo Stefan Beller
2016-05-19  1:09 ` [PATCHv8 3/5] pathspec: move long magic parsing out of prefix_pathspec Stefan Beller
2016-05-19  1:09 ` [PATCHv8 4/5] pathspec: move prefix check out of the inner loop Stefan Beller
2016-05-19  1:09 ` [PATCHv8 5/5] pathspec: allow querying for attributes Stefan Beller
2016-05-19 18:53   ` Junio C Hamano
2016-05-19 20:42     ` Stefan Beller
2016-05-19 21:00       ` Junio C Hamano
2016-05-19 19:37   ` Junio C Hamano
2016-05-19 18:55 ` [PATCHv8 0/5] pathspec magic extension to search " Junio C Hamano
2016-05-19 21:00   ` Stefan Beller
2016-05-19 21:05     ` Junio C Hamano
2016-05-19 21:25       ` Stefan Beller
2016-05-20 17:00         ` Junio C Hamano
2016-05-20 18:12           ` Stefan Beller
2016-05-20 18:19             ` Junio C Hamano
2016-05-22 11:45         ` Duy Nguyen
2016-05-23 18:49           ` Stefan Beller
2016-05-24  2:00             ` Duy Nguyen

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).