git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH nd/attr-match-optim-more 1/2] gitignore: make pattern parsing code a separate function
@ 2012-10-09  2:24 Nguyễn Thái Ngọc Duy
  2012-10-09  2:24 ` [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore Nguyễn Thái Ngọc Duy
  0 siblings, 1 reply; 14+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-09  2:24 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

This function can later be reused by attr.c. Also turn to_exclude
field into a flag.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 No changes.

 dir.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++---------------------
 dir.h |  2 +-
 2 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/dir.c b/dir.c
index 0015cc5..48aed85 100644
--- a/dir.c
+++ b/dir.c
@@ -308,42 +308,69 @@ static int no_wildcard(const char *string)
 	return string[simple_length(string)] == '\0';
 }
 
+static void parse_exclude_pattern(const char **pattern,
+				  int *patternlen,
+				  int *flags,
+				  int *nowildcardlen)
+{
+	const char *p = *pattern;
+	size_t i, len;
+
+	*flags = 0;
+	if (*p == '!') {
+		*flags |= EXC_FLAG_NEGATIVE;
+		p++;
+	}
+	len = strlen(p);
+	if (len && p[len - 1] == '/') {
+		len--;
+		*flags |= EXC_FLAG_MUSTBEDIR;
+	}
+	for (i = 0; i < len; i++) {
+		if (p[i] == '/')
+			break;
+	}
+	if (i == len)
+		*flags |= EXC_FLAG_NODIR;
+	*nowildcardlen = simple_length(p);
+	/*
+	 * we should have excluded the trailing slash from 'p' too,
+	 * but that's one more allocation. Instead just make sure
+	 * nowildcardlen does not exceed real patternlen
+	 */
+	if (*nowildcardlen > len)
+		*nowildcardlen = len;
+	if (*p == '*' && no_wildcard(p + 1))
+		*flags |= EXC_FLAG_ENDSWITH;
+	*pattern = p;
+	*patternlen = len;
+}
+
 void add_exclude(const char *string, const char *base,
 		 int baselen, struct exclude_list *which)
 {
 	struct exclude *x;
-	size_t len;
-	int to_exclude = 1;
-	int flags = 0;
+	int patternlen;
+	int flags;
+	int nowildcardlen;
 
-	if (*string == '!') {
-		to_exclude = 0;
-		string++;
-	}
-	len = strlen(string);
-	if (len && string[len - 1] == '/') {
+	parse_exclude_pattern(&string, &patternlen, &flags, &nowildcardlen);
+	if (flags & EXC_FLAG_MUSTBEDIR) {
 		char *s;
-		x = xmalloc(sizeof(*x) + len);
+		x = xmalloc(sizeof(*x) + patternlen + 1);
 		s = (char *)(x+1);
-		memcpy(s, string, len - 1);
-		s[len - 1] = '\0';
-		string = s;
+		memcpy(s, string, patternlen);
+		s[patternlen] = '\0';
 		x->pattern = s;
-		flags = EXC_FLAG_MUSTBEDIR;
 	} else {
 		x = xmalloc(sizeof(*x));
 		x->pattern = string;
 	}
-	x->to_exclude = to_exclude;
-	x->patternlen = strlen(string);
+	x->patternlen = patternlen;
+	x->nowildcardlen = nowildcardlen;
 	x->base = base;
 	x->baselen = baselen;
 	x->flags = flags;
-	if (!strchr(string, '/'))
-		x->flags |= EXC_FLAG_NODIR;
-	x->nowildcardlen = simple_length(string);
-	if (*string == '*' && no_wildcard(string+1))
-		x->flags |= EXC_FLAG_ENDSWITH;
 	ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
 	which->excludes[which->nr++] = x;
 }
@@ -518,7 +545,7 @@ int excluded_from_list(const char *pathname,
 	for (i = el->nr - 1; 0 <= i; i--) {
 		struct exclude *x = el->excludes[i];
 		const char *name, *exclude = x->pattern;
-		int to_exclude = x->to_exclude;
+		int to_exclude = x->flags & EXC_FLAG_NEGATIVE ? 0 : 1;
 		int namelen, prefix = x->nowildcardlen;
 
 		if (x->flags & EXC_FLAG_MUSTBEDIR) {
diff --git a/dir.h b/dir.h
index 893465a..41ea32d 100644
--- a/dir.h
+++ b/dir.h
@@ -11,6 +11,7 @@ struct dir_entry {
 #define EXC_FLAG_NODIR 1
 #define EXC_FLAG_ENDSWITH 4
 #define EXC_FLAG_MUSTBEDIR 8
+#define EXC_FLAG_NEGATIVE 16
 
 struct exclude_list {
 	int nr;
@@ -21,7 +22,6 @@ struct exclude_list {
 		int nowildcardlen;
 		const char *base;
 		int baselen;
-		int to_exclude;
 		int flags;
 	} **excludes;
 };
-- 
1.8.0.rc0.29.g1fdd78f

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore
  2012-10-09  2:24 [PATCH nd/attr-match-optim-more 1/2] gitignore: make pattern parsing code a separate function Nguyễn Thái Ngọc Duy
@ 2012-10-09  2:24 ` Nguyễn Thái Ngọc Duy
  2012-10-09  5:08   ` Junio C Hamano
  0 siblings, 1 reply; 14+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-09  2:24 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

.gitattributes and .gitignore share the same pattern syntax but has
separate matching implementation. Over the years, ignore's
implementation accumulates more optimizations while attr's stays the
same.

This patch adds those optimizations to attr. Basically it tries to
avoid fnmatch as much as possible in favor of strncmp.

A few notes from this work is put in the documentation:

* "!pattern" syntax is not supported in .gitattributes. Negative
  patterns may work well for a single attribute like .gitignore. It's
  confusing in .gitattributes are many attributes can be
  set/unset/undefined at using the same pattern.

* we support attaching attributes to directories at the syntax
  level, but we do not really attach attributes on directory or use
  them.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 No more die() for path/ syntax. Rephrase "!syntax" messages from not
 supported to forbidden. A small test to verify that we actually
 forbid it.

 Documentation/gitattributes.txt |  3 +-
 attr.c                          | 67 ++++++++++++++++++++++++++++++++++-------
 dir.c                           |  8 ++---
 dir.h                           |  1 +
 t/t0003-attributes.sh           | 14 +++++++++
 5 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 80120ea..cc2ff1d 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -23,7 +23,7 @@ Each line in `gitattributes` file is of form:
 That is, a pattern followed by an attributes list,
 separated by whitespaces.  When the pattern matches the
 path in question, the attributes listed on the line are given to
-the path.
+the path. Only files can be attached attributes to.
 
 Each attribute can be in one of these states for a given path:
 
@@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
 overrides an earlier line.  This overriding is done per
 attribute.  The rules how the pattern matches paths are the
 same as in `.gitignore` files; see linkgit:gitignore[5].
+Unlike `.gitignore`, negative patterns are forbidden.
 
 When deciding what attributes are assigned to a path, git
 consults `$GIT_DIR/info/attributes` file (which has the highest
diff --git a/attr.c b/attr.c
index e7caee4..7e85f82 100644
--- a/attr.c
+++ b/attr.c
@@ -115,6 +115,13 @@ struct attr_state {
 	const char *setto;
 };
 
+struct pattern {
+	const char *pattern;
+	int patternlen;
+	int nowildcardlen;
+	int flags;		/* EXC_FLAG_* */
+};
+
 /*
  * One rule, as from a .gitattributes file.
  *
@@ -131,7 +138,7 @@ struct attr_state {
  */
 struct match_attr {
 	union {
-		char *pattern;
+		struct pattern pat;
 		struct git_attr *attr;
 	} u;
 	char is_macro;
@@ -241,9 +248,17 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 	if (is_macro)
 		res->u.attr = git_attr_internal(name, namelen);
 	else {
-		res->u.pattern = (char *)&(res->state[num_attr]);
-		memcpy(res->u.pattern, name, namelen);
-		res->u.pattern[namelen] = 0;
+		char *p = (char *)&(res->state[num_attr]);
+		memcpy(p, name, namelen);
+		p[namelen] = 0;
+		res->u.pat.pattern = p;
+		parse_exclude_pattern(&res->u.pat.pattern,
+				      &res->u.pat.patternlen,
+				      &res->u.pat.flags,
+				      &res->u.pat.nowildcardlen);
+		if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
+			die(_("Negative patterns are forbidden in git attributes\n"
+			      "Use '\\!' for literal leading exclamation."));
 	}
 	res->is_macro = is_macro;
 	res->num_attr = num_attr;
@@ -640,25 +655,55 @@ static void prepare_attr_stack(const char *path)
 
 static int path_matches(const char *pathname, int pathlen,
 			const char *basename,
-			const char *pattern,
+			const struct pattern *pat,
 			const char *base, int baselen)
 {
-	if (!strchr(pattern, '/')) {
+	const char *pattern = pat->pattern;
+	int prefix = pat->nowildcardlen;
+	const char *name;
+	int namelen;
+
+	if (pat->flags & EXC_FLAG_NODIR) {
+		if (prefix == pat->patternlen &&
+		    !strcmp_icase(pattern, basename))
+			return 1;
+
+		if (pat->flags & EXC_FLAG_ENDSWITH &&
+		    pat->patternlen - 1 <= pathlen &&
+		    !strcmp_icase(pattern + 1, pathname +
+				  pathlen - pat->patternlen + 1))
+			return 1;
+
 		return (fnmatch_icase(pattern, basename, 0) == 0);
 	}
 	/*
 	 * match with FNM_PATHNAME; the pattern has base implicitly
 	 * in front of it.
 	 */
-	if (*pattern == '/')
+	if (*pattern == '/') {
 		pattern++;
+		prefix--;
+	}
+
+	/*
+	 * note: unlike excluded_from_list, baselen here does not
+	 * contain the trailing slash
+	 */
+
 	if (pathlen < baselen ||
 	    (baselen && pathname[baselen] != '/') ||
 	    strncmp(pathname, base, baselen))
 		return 0;
-	if (baselen != 0)
-		baselen++;
-	return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+
+	namelen = baselen ? pathlen - baselen - 1 : pathlen;
+	name = pathname + pathlen - namelen;
+
+	/* if the non-wildcard part is longer than the remaining
+	   pathname, surely it cannot match */
+	if (!namelen || prefix > namelen)
+		return 0;
+
+	return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
 }
 
 static int macroexpand_one(int attr_nr, int rem);
@@ -696,7 +741,7 @@ static int fill(const char *path, int pathlen, const char *basename,
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen, basename,
-				 a->u.pattern, base, stk->originlen))
+				 &a->u.pat, base, stk->originlen))
 			rem = fill_one("fill", a, rem);
 	}
 	return rem;
diff --git a/dir.c b/dir.c
index 48aed85..cddf043 100644
--- a/dir.c
+++ b/dir.c
@@ -308,10 +308,10 @@ static int no_wildcard(const char *string)
 	return string[simple_length(string)] == '\0';
 }
 
-static void parse_exclude_pattern(const char **pattern,
-				  int *patternlen,
-				  int *flags,
-				  int *nowildcardlen)
+void parse_exclude_pattern(const char **pattern,
+			   int *patternlen,
+			   int *flags,
+			   int *nowildcardlen)
 {
 	const char *p = *pattern;
 	size_t i, len;
diff --git a/dir.h b/dir.h
index 41ea32d..fd5c2aa 100644
--- a/dir.h
+++ b/dir.h
@@ -97,6 +97,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
 extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
 					  char **buf_p, struct exclude_list *which, int check_index);
 extern void add_excludes_from_file(struct dir_struct *, const char *fname);
+extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
 extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *which);
 extern void free_excludes(struct exclude_list *el);
diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
index 51f3045..4a1402f 100755
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@@ -242,4 +242,18 @@ test_expect_success 'bare repository: test info/attributes' '
 	attr_check subdir/a/i unspecified
 '
 
+test_expect_success 'leave bare' '
+	cd ..
+'
+
+test_expect_success 'negative patterns' '
+	echo "!f test=bar" >.gitattributes &&
+	test_must_fail git check-attr test -- f
+'
+
+test_expect_success 'patterns starting with exclamation' '
+	echo "\!f test=foo" >.gitattributes &&
+	attr_check "!f" foo
+'
+
 test_done
-- 
1.8.0.rc0.29.g1fdd78f

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore
  2012-10-09  2:24 ` [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore Nguyễn Thái Ngọc Duy
@ 2012-10-09  5:08   ` Junio C Hamano
  2012-10-09  6:10     ` Johannes Sixt
  2012-10-10 10:21     ` [PATCH v2 " Nguyễn Thái Ngọc Duy
  0 siblings, 2 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-10-09  5:08 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> .gitattributes and .gitignore share the same pattern syntax but
> has separate matching implementation. Over the years, ignore's
> implementation accumulates more optimizations while attr's stays
> the same.
>
> This patch adds those optimizations to attr. Basically it tries to
> avoid fnmatch as much as possible in favor of strncmp.
>
> A few notes from this work is put in the documentation:
>
> * "!pattern" syntax is not supported in .gitattributes. Negative
>   patterns may work well for a single attribute like .gitignore. It's
>   confusing in .gitattributes are many attributes can be
>   set/unset/undefined at using the same pattern.

I think the above misses the point.

Imagine if we allowed only one attribute per line, instead of
multiple attributes on one line.
    
 - If you want to unset the attribute, you would write "path -attr".

 - If you want to reset the attribute to unspecified, you would
   write "path !attr".

Both are used in conjunction with some other (typically more
generic) pattern that sets, sets to a value, and/or unsets the
attribute, to countermand its effect.

If you were to allow "!path attr", what does it mean?  It obviously
is not about setting the attr to true or to a string value, but is
it countermanding an earlier set and telling us to unset the attr,
or make the attr unspecified?

That is the ambiguity issue "!pattern" syntax would introduce if it
were to be allowed in the attributes.  I think "multiple attributes
on the same line" is a red herring.

> * we support attaching attributes to directories at the syntax
>   level, but we do not really attach attributes on directory or use
>   them.

I would say "... but we do not currently use attributes on
directories."

> diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
> index 80120ea..cc2ff1d 100644
> --- a/Documentation/gitattributes.txt
> +++ b/Documentation/gitattributes.txt
> @@ -23,7 +23,7 @@ Each line in `gitattributes` file is of form:
>  That is, a pattern followed by an attributes list,
>  separated by whitespaces.  When the pattern matches the
>  path in question, the attributes listed on the line are given to
> -the path.
> +the path. Only files can be attached attributes to.

Symbolic links?

I would strongly suggest dropping "Only ... can be...".  You can
specify attributes to anything and check with "check-attr".  It is
just that core part does not have anything that pays attention to
attributes given to directories in the current codebase.

> @@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
>  overrides an earlier line.  This overriding is done per
>  attribute.  The rules how the pattern matches paths are the
>  same as in `.gitignore` files; see linkgit:gitignore[5].
> +Unlike `.gitignore`, negative patterns are forbidden.

OK (I am debating myself if it helps the readers if we said why it
is forbidden to write such).

> diff --git a/attr.c b/attr.c
> index e7caee4..7e85f82 100644
> --- a/attr.c
> +++ b/attr.c
> @@ -115,6 +115,13 @@ struct attr_state {
>  	const char *setto;
>  };
>  
> +struct pattern {
> +	const char *pattern;
> +	int patternlen;
> +	int nowildcardlen;
> +	int flags;		/* EXC_FLAG_* */
> +};
> +
>  /*
>   * One rule, as from a .gitattributes file.
>   *
> @@ -131,7 +138,7 @@ struct attr_state {
>   */
>  struct match_attr {
>  	union {
> -		char *pattern;
> +		struct pattern pat;
>  		struct git_attr *attr;
>  	} u;
>  	char is_macro;
> @@ -241,9 +248,17 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
>  	if (is_macro)
>  		res->u.attr = git_attr_internal(name, namelen);
>  	else {
> -		res->u.pattern = (char *)&(res->state[num_attr]);
> -		memcpy(res->u.pattern, name, namelen);
> -		res->u.pattern[namelen] = 0;
> +		char *p = (char *)&(res->state[num_attr]);
> +		memcpy(p, name, namelen);
> +		p[namelen] = 0;

This is inherited from the original code, but *res is calloc(3)ed
so the above memcpy() automatically NUL terminates this string.

> +		res->u.pat.pattern = p;
> +		parse_exclude_pattern(&res->u.pat.pattern,
> +				      &res->u.pat.patternlen,
> +				      &res->u.pat.flags,
> +				      &res->u.pat.nowildcardlen);
> +		if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
> +			die(_("Negative patterns are forbidden in git attributes\n"
> +			      "Use '\\!' for literal leading exclamation."));
>  	}
>  	res->is_macro = is_macro;
>  	res->num_attr = num_attr;
> @@ -640,25 +655,55 @@ static void prepare_attr_stack(const char *path)
>  
>  static int path_matches(const char *pathname, int pathlen,
>  			const char *basename,
> -			const char *pattern,
> +			const struct pattern *pat,
>  			const char *base, int baselen)
>  {
> -	if (!strchr(pattern, '/')) {
> +	const char *pattern = pat->pattern;
> +	int prefix = pat->nowildcardlen;
> +	const char *name;
> +	int namelen;
> +
> +	if (pat->flags & EXC_FLAG_NODIR) {
> +		if (prefix == pat->patternlen &&
> +		    !strcmp_icase(pattern, basename))
> +			return 1;

At some point, we should rename strcmp_icase and strncmp_icase to
make it clear that

 (1) they are not about "strings"; and
 (2) icase is not always in effect.

They are about comparing pathnames and that is the reason why
depending on core.ignorecase settings we sometimes do _icase()
comparison.  The same issue is shared with fnmatch_icase() but
"fnmatch_" prefix hints that the helper is not about matching
general strings so it is with lessor problem compared with other
two.

> +		if (pat->flags & EXC_FLAG_ENDSWITH &&
> +		    pat->patternlen - 1 <= pathlen &&
> +		    !strcmp_icase(pattern + 1, pathname +
> +				  pathlen - pat->patternlen + 1))
> +			return 1;
> +
>  		return (fnmatch_icase(pattern, basename, 0) == 0);
>  	}
>  	/*
>  	 * match with FNM_PATHNAME; the pattern has base implicitly
>  	 * in front of it.
>  	 */
> -	if (*pattern == '/')
> +	if (*pattern == '/') {
>  		pattern++;
> +		prefix--;
> +	}
> +
> +	/*
> +	 * note: unlike excluded_from_list, baselen here does not
> +	 * contain the trailing slash
> +	 */
> +
>  	if (pathlen < baselen ||
>  	    (baselen && pathname[baselen] != '/') ||
>  	    strncmp(pathname, base, baselen))

This is probably strncmp_icase(), which is to be renamed to
something more sensible.

>  		return 0;
> -	if (baselen != 0)
> -		baselen++;
> -	return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
> +
> +	namelen = baselen ? pathlen - baselen - 1 : pathlen;

I think this "- 1" is what the above "note: unlike excluded_from..."
refers to, but then isn't a same adjustment necessary to the "if"
condition we see above that compares pathlen and baselen???

> +	name = pathname + pathlen - namelen;
> +
> +	/* if the non-wildcard part is longer than the remaining
> +	   pathname, surely it cannot match */

Style.

> +	if (!namelen || prefix > namelen)
> +		return 0;
> +
> +	return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
>  }

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore
  2012-10-09  5:08   ` Junio C Hamano
@ 2012-10-09  6:10     ` Johannes Sixt
  2012-10-09  6:47       ` Junio C Hamano
  2012-10-10 10:21     ` [PATCH v2 " Nguyễn Thái Ngọc Duy
  1 sibling, 1 reply; 14+ messages in thread
From: Johannes Sixt @ 2012-10-09  6:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Nguyễn Thái Ngọc Duy, git

Am 10/9/2012 7:08, schrieb Junio C Hamano:
> Imagine if we allowed only one attribute per line, instead of
> multiple attributes on one line.
>     
>  - If you want to unset the attribute, you would write "path -attr".
> 
>  - If you want to reset the attribute to unspecified, you would
>    write "path !attr".
> 
> Both are used in conjunction with some other (typically more
> generic) pattern that sets, sets to a value, and/or unsets the
> attribute, to countermand its effect.
> 
> If you were to allow "!path attr", what does it mean?  It obviously
> is not about setting the attr to true or to a string value, but is
> it countermanding an earlier set and telling us to unset the attr,
> or make the attr unspecified?

If I have at the toplevel:

  *.txt  whitespace=tabwidth=4

and in a subdirectory

  *.txt  whitespace=tabwidth=8
  !README.txt

it could be interpreted as "do not apply *.txt to REAME.txt in this
subdirectory". That is, it does not countermand some _particular_
attribute setting, but says "use the attributes collected elsewhere".

-- Hannes

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore
  2012-10-09  6:10     ` Johannes Sixt
@ 2012-10-09  6:47       ` Junio C Hamano
  2012-10-09 16:40         ` Junio C Hamano
  0 siblings, 1 reply; 14+ messages in thread
From: Junio C Hamano @ 2012-10-09  6:47 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Nguyễn Thái Ngọc Duy, git

Johannes Sixt <j.sixt@viscovery.net> writes:

> Am 10/9/2012 7:08, schrieb Junio C Hamano:
>> Imagine if we allowed only one attribute per line, instead of
>> multiple attributes on one line.
>>     
>>  - If you want to unset the attribute, you would write "path -attr".
>> 
>>  - If you want to reset the attribute to unspecified, you would
>>    write "path !attr".
>> 
>> Both are used in conjunction with some other (typically more
>> generic) pattern that sets, sets to a value, and/or unsets the
>> attribute, to countermand its effect.
>> 
>> If you were to allow "!path attr", what does it mean?  It obviously
>> is not about setting the attr to true or to a string value, but is
>> it countermanding an earlier set and telling us to unset the attr,
>> or make the attr unspecified?
>
> If I have at the toplevel:
>
>   *.txt  whitespace=tabwidth=4
>
> and in a subdirectory
>
>   *.txt  whitespace=tabwidth=8
>   !README.txt
>
> it could be interpreted as "do not apply *.txt to REAME.txt in this
> subdirectory". That is, it does not countermand some _particular_
> attribute setting, but says "use the attributes collected elsewhere".

It makes it unclear what "elsewhere" means, though (besides, it does
not match the way the matching logic works at all).

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore
  2012-10-09  6:47       ` Junio C Hamano
@ 2012-10-09 16:40         ` Junio C Hamano
  0 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-10-09 16:40 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Nguyễn Thái Ngọc Duy, git

Junio C Hamano <gitster@pobox.com> writes:

> Johannes Sixt <j.sixt@viscovery.net> writes:
>
>> Am 10/9/2012 7:08, schrieb Junio C Hamano:
>>> Imagine if we allowed only one attribute per line, instead of
>>> multiple attributes on one line.
>>>     
>>>  - If you want to unset the attribute, you would write "path -attr".
>>> 
>>>  - If you want to reset the attribute to unspecified, you would
>>>    write "path !attr".
>>> 
>>> Both are used in conjunction with some other (typically more
>>> generic) pattern that sets, sets to a value, and/or unsets the
>>> attribute, to countermand its effect.
>>> 
>>> If you were to allow "!path attr", what does it mean?  It obviously
>>> is not about setting the attr to true or to a string value, but is
>>> it countermanding an earlier set and telling us to unset the attr,
>>> or make the attr unspecified?
>>
>> If I have at the toplevel:
>>
>>   *.txt  whitespace=tabwidth=4
>>
>> and in a subdirectory
>>
>>   *.txt  whitespace=tabwidth=8
>>   !README.txt
>>
>> it could be interpreted as "do not apply *.txt to REAME.txt in this
>> subdirectory". That is, it does not countermand some _particular_
>> attribute setting, but says "use the attributes collected elsewhere".
>
> It makes it unclear what "elsewhere" means, though (besides, it does
> not match the way the matching logic works at all).

Ignoring the current implementation, I find the suggested semantics
somewhat intriguing.  It is something we may want to look into in
the future.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-09  5:08   ` Junio C Hamano
  2012-10-09  6:10     ` Johannes Sixt
@ 2012-10-10 10:21     ` Nguyễn Thái Ngọc Duy
  2012-10-10 20:03       ` Junio C Hamano
                         ` (2 more replies)
  1 sibling, 3 replies; 14+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-10 10:21 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

.gitattributes and .gitignore share the same pattern syntax but has
separate matching implementation. Over the years, ignore's
implementation accumulates more optimizations while attr's stays the
same.

This patch adds those optimizations to attr. Basically it tries to
avoid fnmatch as much as possible in favor of strncmp.

A few notes from this work is put in the documentation:

* "!pattern" syntax is not supported in .gitattributes as it's not
  clear what it means (e.g. "!path attr" is about unsetting attr, or
  undefining it..)

* patterns applying to directories

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 How about this? Diff from the previous version:

   diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
   index cc2ff1d..9a0ed19 100644
   --- a/Documentation/gitattributes.txt
   +++ b/Documentation/gitattributes.txt
   @@ -23,7 +23,7 @@ Each line in `gitattributes` file is of form:
    That is, a pattern followed by an attributes list,
    separated by whitespaces.  When the pattern matches the
    path in question, the attributes listed on the line are given to
   -the path. Only files can be attached attributes to.
   +the path.
    
    Each attribute can be in one of these states for a given path:
    
   @@ -58,6 +58,13 @@ attribute.  The rules how the pattern matches paths are the
    same as in `.gitignore` files; see linkgit:gitignore[5].
    Unlike `.gitignore`, negative patterns are not supported.
    
   +Note that if a .gitignore rule matches a directory, the directory
   +is ignored, which may be seen as assigning "ignore" attribute the
   +directory and all files and directories inside. However, if a
   +.gitattributes rule matches a directory, it manipulates
   +attributes on that directory only, not files and directories
   +inside.
   +
    When deciding what attributes are assigned to a path, git
    consults `$GIT_DIR/info/attributes` file (which has the highest
    precedence), `.gitattributes` file in the same directory as the
   diff --git a/attr.c b/attr.c
   index 7e85f82..4faf1ff 100644
   --- a/attr.c
   +++ b/attr.c
   @@ -250,7 +250,6 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
    	else {
    		char *p = (char *)&(res->state[num_attr]);
    		memcpy(p, name, namelen);
   -		p[namelen] = 0;
    		res->u.pat.pattern = p;
    		parse_exclude_pattern(&res->u.pat.pattern,
    				      &res->u.pat.patternlen,
   @@ -690,16 +689,18 @@ static int path_matches(const char *pathname, int pathlen,
    	 * contain the trailing slash
    	 */
    
   -	if (pathlen < baselen ||
   +	if (pathlen < baselen + 1 ||
    	    (baselen && pathname[baselen] != '/') ||
   -	    strncmp(pathname, base, baselen))
   +	    strncmp_icase(pathname, base, baselen))
    		return 0;
    
    	namelen = baselen ? pathlen - baselen - 1 : pathlen;
    	name = pathname + pathlen - namelen;
    
   -	/* if the non-wildcard part is longer than the remaining
   -	   pathname, surely it cannot match */
   +	/*
   +	 * if the non-wildcard part is longer than the remaining
   +	 * pathname, surely it cannot match
   +	 */
    	if (!namelen || prefix > namelen)
    		return 0;
 

 Documentation/gitattributes.txt |  8 +++++
 attr.c                          | 72 +++++++++++++++++++++++++++++++++--------
 dir.c                           |  8 ++---
 dir.h                           |  1 +
 t/t0003-attributes.sh           | 14 ++++++++
 5 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 80120ea..9a0ed19 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -56,6 +56,14 @@ When more than one pattern matches the path, a later line
 overrides an earlier line.  This overriding is done per
 attribute.  The rules how the pattern matches paths are the
 same as in `.gitignore` files; see linkgit:gitignore[5].
+Unlike `.gitignore`, negative patterns are not supported.
+
+Note that if a .gitignore rule matches a directory, the directory
+is ignored, which may be seen as assigning "ignore" attribute the
+directory and all files and directories inside. However, if a
+.gitattributes rule matches a directory, it manipulates
+attributes on that directory only, not files and directories
+inside.
 
 When deciding what attributes are assigned to a path, git
 consults `$GIT_DIR/info/attributes` file (which has the highest
diff --git a/attr.c b/attr.c
index e7caee4..4faf1ff 100644
--- a/attr.c
+++ b/attr.c
@@ -115,6 +115,13 @@ struct attr_state {
 	const char *setto;
 };
 
+struct pattern {
+	const char *pattern;
+	int patternlen;
+	int nowildcardlen;
+	int flags;		/* EXC_FLAG_* */
+};
+
 /*
  * One rule, as from a .gitattributes file.
  *
@@ -131,7 +138,7 @@ struct attr_state {
  */
 struct match_attr {
 	union {
-		char *pattern;
+		struct pattern pat;
 		struct git_attr *attr;
 	} u;
 	char is_macro;
@@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 	if (is_macro)
 		res->u.attr = git_attr_internal(name, namelen);
 	else {
-		res->u.pattern = (char *)&(res->state[num_attr]);
-		memcpy(res->u.pattern, name, namelen);
-		res->u.pattern[namelen] = 0;
+		char *p = (char *)&(res->state[num_attr]);
+		memcpy(p, name, namelen);
+		res->u.pat.pattern = p;
+		parse_exclude_pattern(&res->u.pat.pattern,
+				      &res->u.pat.patternlen,
+				      &res->u.pat.flags,
+				      &res->u.pat.nowildcardlen);
+		if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
+			die(_("Negative patterns are forbidden in git attributes\n"
+			      "Use '\\!' for literal leading exclamation."));
 	}
 	res->is_macro = is_macro;
 	res->num_attr = num_attr;
@@ -640,25 +654,57 @@ static void prepare_attr_stack(const char *path)
 
 static int path_matches(const char *pathname, int pathlen,
 			const char *basename,
-			const char *pattern,
+			const struct pattern *pat,
 			const char *base, int baselen)
 {
-	if (!strchr(pattern, '/')) {
+	const char *pattern = pat->pattern;
+	int prefix = pat->nowildcardlen;
+	const char *name;
+	int namelen;
+
+	if (pat->flags & EXC_FLAG_NODIR) {
+		if (prefix == pat->patternlen &&
+		    !strcmp_icase(pattern, basename))
+			return 1;
+
+		if (pat->flags & EXC_FLAG_ENDSWITH &&
+		    pat->patternlen - 1 <= pathlen &&
+		    !strcmp_icase(pattern + 1, pathname +
+				  pathlen - pat->patternlen + 1))
+			return 1;
+
 		return (fnmatch_icase(pattern, basename, 0) == 0);
 	}
 	/*
 	 * match with FNM_PATHNAME; the pattern has base implicitly
 	 * in front of it.
 	 */
-	if (*pattern == '/')
+	if (*pattern == '/') {
 		pattern++;
-	if (pathlen < baselen ||
+		prefix--;
+	}
+
+	/*
+	 * note: unlike excluded_from_list, baselen here does not
+	 * contain the trailing slash
+	 */
+
+	if (pathlen < baselen + 1 ||
 	    (baselen && pathname[baselen] != '/') ||
-	    strncmp(pathname, base, baselen))
+	    strncmp_icase(pathname, base, baselen))
+		return 0;
+
+	namelen = baselen ? pathlen - baselen - 1 : pathlen;
+	name = pathname + pathlen - namelen;
+
+	/*
+	 * if the non-wildcard part is longer than the remaining
+	 * pathname, surely it cannot match
+	 */
+	if (!namelen || prefix > namelen)
 		return 0;
-	if (baselen != 0)
-		baselen++;
-	return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+
+	return fnmatch_icase(pattern, name, FNM_PATHNAME) == 0;
 }
 
 static int macroexpand_one(int attr_nr, int rem);
@@ -696,7 +742,7 @@ static int fill(const char *path, int pathlen, const char *basename,
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen, basename,
-				 a->u.pattern, base, stk->originlen))
+				 &a->u.pat, base, stk->originlen))
 			rem = fill_one("fill", a, rem);
 	}
 	return rem;
diff --git a/dir.c b/dir.c
index 48aed85..cddf043 100644
--- a/dir.c
+++ b/dir.c
@@ -308,10 +308,10 @@ static int no_wildcard(const char *string)
 	return string[simple_length(string)] == '\0';
 }
 
-static void parse_exclude_pattern(const char **pattern,
-				  int *patternlen,
-				  int *flags,
-				  int *nowildcardlen)
+void parse_exclude_pattern(const char **pattern,
+			   int *patternlen,
+			   int *flags,
+			   int *nowildcardlen)
 {
 	const char *p = *pattern;
 	size_t i, len;
diff --git a/dir.h b/dir.h
index 41ea32d..fd5c2aa 100644
--- a/dir.h
+++ b/dir.h
@@ -97,6 +97,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
 extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
 					  char **buf_p, struct exclude_list *which, int check_index);
 extern void add_excludes_from_file(struct dir_struct *, const char *fname);
+extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
 extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *which);
 extern void free_excludes(struct exclude_list *el);
diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
index 51f3045..4a1402f 100755
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@@ -242,4 +242,18 @@ test_expect_success 'bare repository: test info/attributes' '
 	attr_check subdir/a/i unspecified
 '
 
+test_expect_success 'leave bare' '
+	cd ..
+'
+
+test_expect_success 'negative patterns' '
+	echo "!f test=bar" >.gitattributes &&
+	test_must_fail git check-attr test -- f
+'
+
+test_expect_success 'patterns starting with exclamation' '
+	echo "\!f test=foo" >.gitattributes &&
+	attr_check "!f" foo
+'
+
 test_done
-- 
1.7.12.1.406.g6ab07c4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-10 10:21     ` [PATCH v2 " Nguyễn Thái Ngọc Duy
@ 2012-10-10 20:03       ` Junio C Hamano
  2012-10-12 10:13         ` Nguyen Thai Ngoc Duy
  2012-10-10 21:41       ` Junio C Hamano
  2012-10-12 19:09       ` Junio C Hamano
  2 siblings, 1 reply; 14+ messages in thread
From: Junio C Hamano @ 2012-10-10 20:03 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> .gitattributes and .gitignore share the same pattern syntax but has
> separate matching implementation. Over the years, ignore's
> implementation accumulates more optimizations while attr's stays the
> same.
>
> This patch adds those optimizations to attr. Basically it tries to
> avoid fnmatch as much as possible in favor of strncmp.
>
> A few notes from this work is put in the documentation:
>
> * "!pattern" syntax is not supported in .gitattributes as it's not

s/not supported/forbidden/;

A reader can take "not supported" as "silently ignored", which is
not the case.  An explicit "forbidden" does not have such a
misinterpretation.

It would save time from both of us if you can check what is queued
on 'pu'.  I do not think I touched the code for off-by-one bugs
there, though.

>   clear what it means (e.g. "!path attr" is about unsetting attr, or
>   undefining it..)
>
> * patterns applying to directories
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  How about this? Diff from the previous version:
>
>    diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
>    index cc2ff1d..9a0ed19 100644
>    --- a/Documentation/gitattributes.txt
>    +++ b/Documentation/gitattributes.txt
>    @@ -23,7 +23,7 @@ Each line in `gitattributes` file is of form:
>     That is, a pattern followed by an attributes list,
>     separated by whitespaces.  When the pattern matches the
>     path in question, the attributes listed on the line are given to
>    -the path. Only files can be attached attributes to.
>    +the path.
>     
>     Each attribute can be in one of these states for a given path:
>     
>    @@ -58,6 +58,13 @@ attribute.  The rules how the pattern matches paths are the
>     same as in `.gitignore` files; see linkgit:gitignore[5].
>     Unlike `.gitignore`, negative patterns are not supported.
>     
>    +Note that if a .gitignore rule matches a directory, the directory
>    +is ignored, which may be seen as assigning "ignore" attribute the
>    +directory and all files and directories inside. However, if a
>    +.gitattributes rule matches a directory, it manipulates
>    +attributes on that directory only, not files and directories
>    +inside.

Why do you even need to mention .gitignore in gitattributes manual
where it is irrelevant from the reader's point of view?

Besides, the interpretation the "may be seen as" suggests is
actively wrong.  It is assigning "ignore-this-and-below" attribute
to the directory, and there is no inconsistency between the two.

Again, I'd suggest dropping this addition.

>    diff --git a/attr.c b/attr.c
>    index 7e85f82..4faf1ff 100644
>    --- a/attr.c
>    +++ b/attr.c
>    @@ -250,7 +250,6 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
>     	else {
>     		char *p = (char *)&(res->state[num_attr]);
>     		memcpy(p, name, namelen);
>    -		p[namelen] = 0;
>     		res->u.pat.pattern = p;
>     		parse_exclude_pattern(&res->u.pat.pattern,
>     				      &res->u.pat.patternlen,
>    @@ -690,16 +689,18 @@ static int path_matches(const char *pathname, int pathlen,
>     	 * contain the trailing slash
>     	 */
>     
>    -	if (pathlen < baselen ||
>    +	if (pathlen < baselen + 1 ||
>     	    (baselen && pathname[baselen] != '/') ||
>    -	    strncmp(pathname, base, baselen))
>    +	    strncmp_icase(pathname, base, baselen))
>     		return 0;
>     
>     	namelen = baselen ? pathlen - baselen - 1 : pathlen;
>     	name = pathname + pathlen - namelen;
>     
>    -	/* if the non-wildcard part is longer than the remaining
>    -	   pathname, surely it cannot match */
>    +	/*
>    +	 * if the non-wildcard part is longer than the remaining
>    +	 * pathname, surely it cannot match
>    +	 */
>     	if (!namelen || prefix > namelen)
>     		return 0;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-10 10:21     ` [PATCH v2 " Nguyễn Thái Ngọc Duy
  2012-10-10 20:03       ` Junio C Hamano
@ 2012-10-10 21:41       ` Junio C Hamano
  2012-10-10 21:50         ` Junio C Hamano
  2012-10-11  1:36         ` Nguyen Thai Ngoc Duy
  2012-10-12 19:09       ` Junio C Hamano
  2 siblings, 2 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-10-10 21:41 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

>    @@ -690,16 +689,18 @@ static int path_matches(const char *pathname, int pathlen,
>     	 * contain the trailing slash
>     	 */
>     
>    -	if (pathlen < baselen ||
>    +	if (pathlen < baselen + 1 ||
>     	    (baselen && pathname[baselen] != '/') ||
>    -	    strncmp(pathname, base, baselen))
>    +	    strncmp_icase(pathname, base, baselen))

Shouldn't the last comparison be

	strncmp_icase(pathname, base, baselen + 1)

instead, if you are trying to match this part from dir.c where
baselen does count the trailing slash?

		if (pathlen < x->baselen ||
		    (x->baselen && pathname[x->baselen-1] != '/') ||
		    strncmp_icase(pathname, x->base, x->baselen))
			continue;

In other words, relative to what was queued to 'pu', something like
this instead....

-- >8 --
Subject: [PATCH] fixup: matching path_matches() in attr.c to that of dir.c

In this function, baselen does not count the trailing slash that
should come after the directory name held in "basename" variable, so
whenever the corresponding code in dir.c:excluded_from_list() refers
to x->baselen, we would need to use "baselen+1" consistently.

Also remove unnecessary NUL-termination of a buffer obtained from
calloc().

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 attr.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/attr.c b/attr.c
index 7e85f82..a9c04a8 100644
--- a/attr.c
+++ b/attr.c
@@ -250,7 +250,6 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 	else {
 		char *p = (char *)&(res->state[num_attr]);
 		memcpy(p, name, namelen);
-		p[namelen] = 0;
 		res->u.pat.pattern = p;
 		parse_exclude_pattern(&res->u.pat.pattern,
 				      &res->u.pat.patternlen,
@@ -687,19 +686,21 @@ static int path_matches(const char *pathname, int pathlen,
 
 	/*
 	 * note: unlike excluded_from_list, baselen here does not
-	 * contain the trailing slash
+	 * count the trailing slash.
 	 */
 
-	if (pathlen < baselen ||
+	if (pathlen < baselen + 1 ||
 	    (baselen && pathname[baselen] != '/') ||
-	    strncmp(pathname, base, baselen))
+	    strncmp_icase(pathname, base, baselen + 1))
 		return 0;
 
 	namelen = baselen ? pathlen - baselen - 1 : pathlen;
 	name = pathname + pathlen - namelen;
 
-	/* if the non-wildcard part is longer than the remaining
-	   pathname, surely it cannot match */
+	/*
+	 * if the non-wildcard part is longer than the remaining
+	 * pathname, surely it cannot match.
+	 */
 	if (!namelen || prefix > namelen)
 		return 0;
 
-- 
1.8.0.rc1.76.g5a375e6

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-10 21:41       ` Junio C Hamano
@ 2012-10-10 21:50         ` Junio C Hamano
  2012-10-11  1:36         ` Nguyen Thai Ngoc Duy
  1 sibling, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2012-10-10 21:50 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>>    @@ -690,16 +689,18 @@ static int path_matches(const char *pathname, int pathlen,
>>     	 * contain the trailing slash
>>     	 */
>>     
>>    -	if (pathlen < baselen ||
>>    +	if (pathlen < baselen + 1 ||
>>     	    (baselen && pathname[baselen] != '/') ||
>>    -	    strncmp(pathname, base, baselen))
>>    +	    strncmp_icase(pathname, base, baselen))
>
> Shouldn't the last comparison be
>
> 	strncmp_icase(pathname, base, baselen + 1)
>
> instead, if you are trying to match this part from dir.c where
> baselen does count the trailing slash?
>
> 		if (pathlen < x->baselen ||
> 		    (x->baselen && pathname[x->baselen-1] != '/') ||
> 		    strncmp_icase(pathname, x->base, x->baselen))
> 			continue;
>
> In other words, relative to what was queued to 'pu', something like
> this instead....

Aaaaand,... it doesn't work and breaks t0003.sh.  Sigh...

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-10 21:41       ` Junio C Hamano
  2012-10-10 21:50         ` Junio C Hamano
@ 2012-10-11  1:36         ` Nguyen Thai Ngoc Duy
  1 sibling, 0 replies; 14+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-11  1:36 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Thu, Oct 11, 2012 at 4:41 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>>    @@ -690,16 +689,18 @@ static int path_matches(const char *pathname, int pathlen,
>>        * contain the trailing slash
>>        */
>>
>>    -  if (pathlen < baselen ||
>>    +  if (pathlen < baselen + 1 ||
>>           (baselen && pathname[baselen] != '/') ||
>>    -      strncmp(pathname, base, baselen))
>>    +      strncmp_icase(pathname, base, baselen))
>
> Shouldn't the last comparison be
>
>         strncmp_icase(pathname, base, baselen + 1)
>
> instead,

"base" does not contain the trailing slash, so it can only match up to
base[baselen-1], then fail at base[baselen], which is '\0'. The "no
trailing slash" business in this function is tricky :(

> if you are trying to match this part from dir.c where
> baselen does count the trailing slash?
>
>                 if (pathlen < x->baselen ||
>                     (x->baselen && pathname[x->baselen-1] != '/') ||
>                     strncmp_icase(pathname, x->base, x->baselen))
>                         continue;

strncmp_icase() here just needs to compare x->baselen-1 chars (i.e. no
trailing slash) as the trailing slash is explicitly checked just above
strncmp_icase. But it does not hurt to compare an extra character so I
leave it unchanged. But obviously it causes confusion when we try to
match this function and the one in attr.c
-- 
Duy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-10 20:03       ` Junio C Hamano
@ 2012-10-12 10:13         ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 14+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-12 10:13 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Thu, Oct 11, 2012 at 3:03 AM, Junio C Hamano <gitster@pobox.com> wrote:
> It would save time from both of us if you can check what is queued
> on 'pu'.  I do not think I touched the code for off-by-one bugs
> there, though.

'pu' looks good.
-- 
Duy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-10 10:21     ` [PATCH v2 " Nguyễn Thái Ngọc Duy
  2012-10-10 20:03       ` Junio C Hamano
  2012-10-10 21:41       ` Junio C Hamano
@ 2012-10-12 19:09       ` Junio C Hamano
  2012-10-13  4:32         ` Nguyen Thai Ngoc Duy
  2 siblings, 1 reply; 14+ messages in thread
From: Junio C Hamano @ 2012-10-12 19:09 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
> index 51f3045..4a1402f 100755
> --- a/t/t0003-attributes.sh
> +++ b/t/t0003-attributes.sh
> @@ -242,4 +242,18 @@ test_expect_success 'bare repository: test info/attributes' '
>  	attr_check subdir/a/i unspecified
>  '
>  
> +test_expect_success 'leave bare' '
> +	cd ..
> +'
> +
> +test_expect_success 'negative patterns' '
> +	echo "!f test=bar" >.gitattributes &&
> +	test_must_fail git check-attr test -- f
> +'
> +
> +test_expect_success 'patterns starting with exclamation' '
> +	echo "\!f test=foo" >.gitattributes &&
> +	attr_check "!f" foo
> +'
> +
>  test_done

This is not entirely your fault, but please don't do that "cd ..".

The original test had "cd bare", made an assumption that step will
never fail (which is mostly correct), and ran everything afterward
in that subdirectory.

Adding "Do a 'cd ..' to come back" is a horrible way to build on
it.  Imagine what happens when another person also did the same
thing, and both changes need to be merged.  You will end up going up
two levels, which is not what you want.

I think the right fix is to make each of the test that wants to run
in "bare" chdir in its own subshell, or not append these new tests
that do not run in the "bare" to the end of this file, but before
the execution goes down to "bare".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2 2/2] attr: more matching optimizations from .gitignore
  2012-10-12 19:09       ` Junio C Hamano
@ 2012-10-13  4:32         ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 14+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-13  4:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Fri, Oct 12, 2012 at 12:09:57PM -0700, Junio C Hamano wrote:
> This is not entirely your fault, but please don't do that "cd ..".
> 
> The original test had "cd bare", made an assumption that step will
> never fail (which is mostly correct), and ran everything afterward
> in that subdirectory.
> 
> Adding "Do a 'cd ..' to come back" is a horrible way to build on
> it.  Imagine what happens when another person also did the same
> thing, and both changes need to be merged.  You will end up going up
> two levels, which is not what you want.
> 
> I think the right fix is to make each of the test that wants to run
> in "bare" chdir in its own subshell, or not append these new tests
> that do not run in the "bare" to the end of this file, but before
> the execution goes down to "bare".
> 

The reason I put these tests at the end was because I destroy
.gitattributes and it might affect the following tests. But obviously
the bare tests run in its own repository (and I have not committed
anything till the repo is cloned) so my .gitattributes changes can't
affect them.

Please squash this in

-- 8< --
diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
index 4a1402f..f6c21ea 100755
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@@ -206,6 +206,16 @@ test_expect_success 'root subdir attribute test' '
 	attr_check subdir/a/i unspecified
 '
 
+test_expect_success 'negative patterns' '
+	echo "!f test=bar" >.gitattributes &&
+	test_must_fail git check-attr test -- f
+'
+
+test_expect_success 'patterns starting with exclamation' '
+	echo "\!f test=foo" >.gitattributes &&
+	attr_check "!f" foo
+'
+
 test_expect_success 'setup bare' '
 	git clone --bare . bare.git &&
 	cd bare.git
@@ -242,18 +252,4 @@ test_expect_success 'bare repository: test info/attributes' '
 	attr_check subdir/a/i unspecified
 '
 
-test_expect_success 'leave bare' '
-	cd ..
-'
-
-test_expect_success 'negative patterns' '
-	echo "!f test=bar" >.gitattributes &&
-	test_must_fail git check-attr test -- f
-'
-
-test_expect_success 'patterns starting with exclamation' '
-	echo "\!f test=foo" >.gitattributes &&
-	attr_check "!f" foo
-'
-
 test_done
-- 8< --
-- 
Duy

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-10-13  4:34 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-09  2:24 [PATCH nd/attr-match-optim-more 1/2] gitignore: make pattern parsing code a separate function Nguyễn Thái Ngọc Duy
2012-10-09  2:24 ` [PATCH nd/attr-match-optim-more 2/2] attr: more matching optimizations from .gitignore Nguyễn Thái Ngọc Duy
2012-10-09  5:08   ` Junio C Hamano
2012-10-09  6:10     ` Johannes Sixt
2012-10-09  6:47       ` Junio C Hamano
2012-10-09 16:40         ` Junio C Hamano
2012-10-10 10:21     ` [PATCH v2 " Nguyễn Thái Ngọc Duy
2012-10-10 20:03       ` Junio C Hamano
2012-10-12 10:13         ` Nguyen Thai Ngoc Duy
2012-10-10 21:41       ` Junio C Hamano
2012-10-10 21:50         ` Junio C Hamano
2012-10-11  1:36         ` Nguyen Thai Ngoc Duy
2012-10-12 19:09       ` Junio C Hamano
2012-10-13  4:32         ` Nguyen Thai Ngoc Duy

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).