git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 6/6] attr: more matching optimizations from .gitignore
Date: Mon, 15 Oct 2012 13:24:39 +0700	[thread overview]
Message-ID: <1350282279-4377-6-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1350282279-4377-1-git-send-email-pclouds@gmail.com>

.gitattributes and .gitignore share the same pattern syntax but has
separate matching implementation. Over the years, ignore's
implementation accumulates more optimizations while attr's stays the
same.

This patch reuses the core matching functions that are also used by
excluded_from_list. excluded_from_list and path_matches can't be
merged due to differences in exclude and attr, for example:

* "!pattern" syntax is forbidden in .gitattributes.  As an attribute
  can be unset (i.e. set to a special value "false") or made back to
  unspecified (i.e. not even set to "false"), "!pattern attr" is unclear
  which one it means.

* we support attaching attributes to directories, but git-core
  internally does not currently make use of attributes on
  directories.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/gitattributes.txt |  1 +
 attr.c                          | 52 ++++++++++++++++++++++++-----------------
 dir.c                           | 22 ++++++++---------
 dir.h                           | 11 +++++++++
 t/t0003-attributes.sh           | 10 ++++++++
 5 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 80120ea..b7c0e65 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -56,6 +56,7 @@ When more than one pattern matches the path, a later line
 overrides an earlier line.  This overriding is done per
 attribute.  The rules how the pattern matches paths are the
 same as in `.gitignore` files; see linkgit:gitignore[5].
+Unlike `.gitignore`, negative patterns are forbidden.
 
 When deciding what attributes are assigned to a path, git
 consults `$GIT_DIR/info/attributes` file (which has the highest
diff --git a/attr.c b/attr.c
index e7caee4..2fc6353 100644
--- a/attr.c
+++ b/attr.c
@@ -115,6 +115,13 @@ struct attr_state {
 	const char *setto;
 };
 
+struct pattern {
+	const char *pattern;
+	int patternlen;
+	int nowildcardlen;
+	int flags;		/* EXC_FLAG_* */
+};
+
 /*
  * One rule, as from a .gitattributes file.
  *
@@ -131,7 +138,7 @@ struct attr_state {
  */
 struct match_attr {
 	union {
-		char *pattern;
+		struct pattern pat;
 		struct git_attr *attr;
 	} u;
 	char is_macro;
@@ -241,9 +248,16 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 	if (is_macro)
 		res->u.attr = git_attr_internal(name, namelen);
 	else {
-		res->u.pattern = (char *)&(res->state[num_attr]);
-		memcpy(res->u.pattern, name, namelen);
-		res->u.pattern[namelen] = 0;
+		char *p = (char *)&(res->state[num_attr]);
+		memcpy(p, name, namelen);
+		res->u.pat.pattern = p;
+		parse_exclude_pattern(&res->u.pat.pattern,
+				      &res->u.pat.patternlen,
+				      &res->u.pat.flags,
+				      &res->u.pat.nowildcardlen);
+		if (res->u.pat.flags & EXC_FLAG_NEGATIVE)
+			die(_("Negative patterns are forbidden in git attributes\n"
+			      "Use '\\!' for literal leading exclamation."));
 	}
 	res->is_macro = is_macro;
 	res->num_attr = num_attr;
@@ -640,25 +654,21 @@ static void prepare_attr_stack(const char *path)
 
 static int path_matches(const char *pathname, int pathlen,
 			const char *basename,
-			const char *pattern,
+			const struct pattern *pat,
 			const char *base, int baselen)
 {
-	if (!strchr(pattern, '/')) {
-		return (fnmatch_icase(pattern, basename, 0) == 0);
+	const char *pattern = pat->pattern;
+	int prefix = pat->nowildcardlen;
+
+	if (pat->flags & EXC_FLAG_NODIR) {
+		return match_basename(basename,
+				      pathlen - (basename - pathname),
+				      pattern, prefix,
+				      pat->patternlen, pat->flags);
 	}
-	/*
-	 * match with FNM_PATHNAME; the pattern has base implicitly
-	 * in front of it.
-	 */
-	if (*pattern == '/')
-		pattern++;
-	if (pathlen < baselen ||
-	    (baselen && pathname[baselen] != '/') ||
-	    strncmp(pathname, base, baselen))
-		return 0;
-	if (baselen != 0)
-		baselen++;
-	return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+	return match_pathname(pathname, pathlen,
+			      base, baselen,
+			      pattern, prefix, pat->patternlen, pat->flags);
 }
 
 static int macroexpand_one(int attr_nr, int rem);
@@ -696,7 +706,7 @@ static int fill(const char *path, int pathlen, const char *basename,
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen, basename,
-				 a->u.pattern, base, stk->originlen))
+				 &a->u.pat, base, stk->originlen))
 			rem = fill_one("fill", a, rem);
 	}
 	return rem;
diff --git a/dir.c b/dir.c
index c4e64a5..ee8e711 100644
--- a/dir.c
+++ b/dir.c
@@ -308,10 +308,10 @@ static int no_wildcard(const char *string)
 	return string[simple_length(string)] == '\0';
 }
 
-static void parse_exclude_pattern(const char **pattern,
-				  int *patternlen,
-				  int *flags,
-				  int *nowildcardlen)
+void parse_exclude_pattern(const char **pattern,
+			   int *patternlen,
+			   int *flags,
+			   int *nowildcardlen)
 {
 	const char *p = *pattern;
 	size_t i, len;
@@ -530,9 +530,9 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	dir->basebuf[baselen] = '\0';
 }
 
-static int match_basename(const char *basename, int basenamelen,
-			  const char *pattern, int prefix, int patternlen,
-			  int flags)
+int match_basename(const char *basename, int basenamelen,
+		   const char *pattern, int prefix, int patternlen,
+		   int flags)
 {
 	if (prefix == patternlen) {
 		if (!strcmp_icase(pattern, basename))
@@ -549,10 +549,10 @@ static int match_basename(const char *basename, int basenamelen,
 	return 0;
 }
 
-static int match_pathname(const char *pathname, int pathlen,
-			  const char *base, int baselen,
-			  const char *pattern, int prefix, int patternlen,
-			  int flags)
+int match_pathname(const char *pathname, int pathlen,
+		   const char *base, int baselen,
+		   const char *pattern, int prefix, int patternlen,
+		   int flags)
 {
 	const char *name;
 	int namelen;
diff --git a/dir.h b/dir.h
index 41ea32d..f5c89e3 100644
--- a/dir.h
+++ b/dir.h
@@ -81,6 +81,16 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas
 struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len);
 
 /*
+ * these implement the matching logic for dir.c:excluded_from_list and
+ * attr.c:path_matches()
+ */
+extern int match_basename(const char *, int,
+			  const char *, int, int, int);
+extern int match_pathname(const char *, int,
+			  const char *, int,
+			  const char *, int, int, int);
+
+/*
  * The excluded() API is meant for callers that check each level of leading
  * directory hierarchies with excluded() to avoid recursing into excluded
  * directories.  Callers that do not do so should use this API instead.
@@ -97,6 +107,7 @@ extern int path_excluded(struct path_exclude_check *, const char *, int namelen,
 extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen,
 					  char **buf_p, struct exclude_list *which, int check_index);
 extern void add_excludes_from_file(struct dir_struct *, const char *fname);
+extern void parse_exclude_pattern(const char **string, int *patternlen, int *flags, int *nowildcardlen);
 extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *which);
 extern void free_excludes(struct exclude_list *el);
diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
index 51f3045..f6c21ea 100755
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@@ -206,6 +206,16 @@ test_expect_success 'root subdir attribute test' '
 	attr_check subdir/a/i unspecified
 '
 
+test_expect_success 'negative patterns' '
+	echo "!f test=bar" >.gitattributes &&
+	test_must_fail git check-attr test -- f
+'
+
+test_expect_success 'patterns starting with exclamation' '
+	echo "\!f test=foo" >.gitattributes &&
+	attr_check "!f" foo
+'
+
 test_expect_success 'setup bare' '
 	git clone --bare . bare.git &&
 	cd bare.git
-- 
1.8.0.rc0.29.g1fdd78f

  parent reply	other threads:[~2012-10-15  6:25 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-15  6:23 nd/attr-match-more-optim, nd/wildmatch and as/check-ignore Nguyễn Thái Ngọc Duy
2012-10-15  6:24 ` [PATCH 1/6] exclude: stricten a length check in EXC_FLAG_ENDSWITH case Nguyễn Thái Ngọc Duy
2012-10-15  6:24   ` [PATCH 2/6] exclude: split basename matching code into a separate function Nguyễn Thái Ngọc Duy
2012-10-15  6:24   ` [PATCH 3/6] exclude: fix a bug in prefix compare optimization Nguyễn Thái Ngọc Duy
2012-10-15  6:24   ` [PATCH 4/6] exclude: split pathname matching code into a separate function Nguyễn Thái Ngọc Duy
2012-10-15  6:24   ` [PATCH 5/6] gitignore: make pattern parsing code " Nguyễn Thái Ngọc Duy
2012-10-15  6:24   ` Nguyễn Thái Ngọc Duy [this message]
2012-10-15  6:25 ` [PATCH 01/13] ctype: make sane_ctype[] const array Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 02/13] ctype: support iscntrl, ispunct, isxdigit and isprint Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 03/13] Import wildmatch from rsync Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 04/13] wildmatch: remove unnecessary functions Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 05/13] wildmatch: follow Git's coding convention Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 06/13] Integrate wildmatch to git Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 07/13] t3070: disable unreliable fnmatch tests Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 08/13] wildmatch: make wildmatch's return value compatible with fnmatch Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 09/13] wildmatch: remove static variable force_lower_case Nguyễn Thái Ngọc Duy
2012-10-15  6:25   ` [PATCH 10/13] wildmatch: fix case-insensitive matching Nguyễn Thái Ngọc Duy
2012-10-15  6:26   ` [PATCH 11/13] wildmatch: adjust "**" behavior Nguyễn Thái Ngọc Duy
2012-10-15  6:26   ` [PATCH 12/13] wildmatch: make /**/ match zero or more directories Nguyễn Thái Ngọc Duy
2012-10-15  6:26   ` [PATCH 13/13] Support "**" wildcard in .gitignore and .gitattributes Nguyễn Thái Ngọc Duy
2012-11-04 21:00     ` [PATCH 14/13] wildmatch: fix tests that fail on Windows due to path mangling Johannes Sixt
2012-11-06 12:47       ` Nguyen Thai Ngoc Duy
2012-11-07 19:32         ` Johannes Sixt
2012-11-11 10:13       ` [PATCH 14/13] test-wildmatch: " Nguyễn Thái Ngọc Duy
2012-11-11 10:13         ` [PATCH 15/13] compat/fnmatch: fix off-by-one character class's length check Nguyễn Thái Ngọc Duy
2012-11-13 18:07           ` Johannes Sixt
2012-11-20  7:06           ` Johannes Sixt
2012-11-11 10:47         ` [PATCH 14/13] test-wildmatch: fix tests that fail on Windows due to path mangling Junio C Hamano
2012-11-13 10:06           ` [PATCH 14/13] test-wildmatch: avoid Windows " Nguyễn Thái Ngọc Duy
2012-11-13 18:06             ` Johannes Sixt
2012-11-20  7:02           ` Johannes Sixt
2012-11-20 20:11             ` Junio C Hamano
2012-11-21  6:41               ` Johannes Sixt
2012-10-15  6:27 ` [PATCH 01/12] dir.c: rename cryptic 'which' variable to more consistent name Nguyễn Thái Ngọc Duy
2012-10-15  6:27   ` [PATCH 02/12] dir.c: rename path_excluded() to is_path_excluded() Nguyễn Thái Ngọc Duy
2012-10-15  6:27   ` [PATCH 03/12] dir.c: rename excluded_from_list() to is_excluded_from_list() Nguyễn Thái Ngọc Duy
2012-10-15  6:27   ` [PATCH 04/12] dir.c: rename excluded() to is_excluded() Nguyễn Thái Ngọc Duy
2012-10-15  6:27   ` [PATCH 05/12] dir.c: refactor is_excluded_from_list() Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 06/12] dir.c: refactor is_excluded() Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 07/12] dir.c: refactor is_path_excluded() Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 08/12] dir.c: keep track of where patterns came from Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 09/12] dir.c: refactor treat_gitlinks() Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 10/12] pathspec.c: move reusable code from builtin/add.c Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 11/12] dir.c: provide free_directory() for reclaiming dir_struct memory Nguyễn Thái Ngọc Duy
2012-10-15  6:28   ` [PATCH 12/12] Add git-check-ignore sub-command Nguyễn Thái Ngọc Duy
2012-10-15 22:31     ` Junio C Hamano
2012-10-16 11:08       ` Nguyen Thai Ngoc Duy
2012-10-16 14:09         ` Adam Spiers
2012-10-16 15:07           ` Nguyen Thai Ngoc Duy
2012-10-16 14:13       ` Adam Spiers
2012-10-16 16:12         ` Junio C Hamano
2012-12-17  0:10           ` Adam Spiers
2012-11-04 21:07     ` [PATCH as/check-ignore] t0007: fix tests on Windows Johannes Sixt
2012-11-08 18:04       ` Jeff King
2012-10-15 22:13 ` nd/attr-match-more-optim, nd/wildmatch and as/check-ignore Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1350282279-4377-6-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).