git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 4/6] attr: more matching optimizations from .gitignore
Date: Thu,  4 Oct 2012 14:39:50 +0700	[thread overview]
Message-ID: <1349336392-1772-5-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1349336392-1772-1-git-send-email-pclouds@gmail.com>

.gitattributes and .gitignore share the same pattern syntax but has
separate matching implementation. Over the years, ignore's
implementation accumulates more optimizations while attr's stays the
same.

This patch adds those optimizations to .gitattributes. Basically it
tries to avoid fnmatch/wildmatch in favor of strncmp as much as
possible.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 attr.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++----------
 dir.c  |  4 ++--
 dir.h  |  2 ++
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/attr.c b/attr.c
index eb576ac..3fde9fa 100644
--- a/attr.c
+++ b/attr.c
@@ -116,6 +116,13 @@ struct attr_state {
 	const char *setto;
 };
 
+struct pattern {
+	const char *pattern;
+	int patternlen;
+	int nowildcardlen;
+	int flags;		/* EXC_FLAG_* */
+};
+
 /*
  * One rule, as from a .gitattributes file.
  *
@@ -131,7 +138,7 @@ struct attr_state {
  * listed as they appear in the file (macros unexpanded).
  */
 struct match_attr {
-	const char *pattern;
+	struct pattern pat;
 	struct git_attr *attr;
 	char is_macro;
 	unsigned num_attr;
@@ -243,7 +250,13 @@ static struct match_attr *parse_attr_line(const char *line, const char *src,
 		char *p = (char *)&(res->state[num_attr]);
 		memcpy(p, name, namelen);
 		p[namelen] = 0;
-		res->pattern = p;
+		res->pat.pattern = p;
+		res->pat.patternlen = strlen(p);
+		res->pat.nowildcardlen = simple_length(p);
+		if (!strchr(p, '/'))
+			res->pat.flags |= EXC_FLAG_NODIR;
+		if (*p == '*' && no_wildcard(p+1))
+			res->pat.flags |= EXC_FLAG_ENDSWITH;
 	}
 	res->is_macro = is_macro;
 	res->num_attr = num_attr;
@@ -645,26 +658,56 @@ static void prepare_attr_stack(const char *path)
 
 static int path_matches(const char *pathname, int pathlen,
 			const char *basename,
-			const char *pattern,
+			const struct pattern *pat,
 			const char *base, int baselen)
 {
-	if (!strchr(pattern, '/')) {
+	const char *pattern = pat->pattern;
+	int prefix = pat->nowildcardlen;
+	const char *name;
+	int namelen;
+
+	if (pat->flags & EXC_FLAG_NODIR) {
+		if (prefix == pat->patternlen &&
+		    !strcmp_icase(pattern, basename))
+			return 1;
+
+		if (pat->flags & EXC_FLAG_ENDSWITH &&
+		    pat->patternlen - 1 <= pathlen &&
+		    !strcmp_icase(pattern + 1, pathname +
+				  pathlen - pat->patternlen + 1))
+			return 1;
+
 		return (fnmatch_icase(pattern, basename, 0) == 0);
 	}
 	/*
 	 * match with FNM_PATHNAME; the pattern has base implicitly
 	 * in front of it.
 	 */
-	if (*pattern == '/')
+	if (*pattern == '/') {
 		pattern++;
+		prefix--;
+	}
+
+	/*
+	 * note: unlike excluded_from_list, baselen here does not
+	 * contain the trailing slash
+	 */
+
 	if (pathlen < baselen ||
 	    (baselen && pathname[baselen] != '/') ||
 	    strncmp(pathname, base, baselen))
 		return 0;
-	if (baselen != 0)
-		baselen++;
-	return (ignore_case && iwildmatch(pattern, pathname + baselen)) ||
-		(!ignore_case && wildmatch(pattern, pathname + baselen));
+
+	namelen = baselen ? pathlen - baselen - 1 : pathlen;
+	name = pathname + pathlen - namelen;
+
+	/* if the non-wildcard part is longer than the remaining
+	   pathname, surely it cannot match */
+	if (!namelen || prefix > namelen)
+		return 0;
+
+	return (ignore_case && iwildmatch(pattern, name)) ||
+		(!ignore_case && wildmatch(pattern, name));
 }
 
 static int macroexpand_one(int attr_nr, int rem);
@@ -702,7 +745,7 @@ static int fill(const char *path, int pathlen, const char *basename,
 		if (a->is_macro)
 			continue;
 		if (path_matches(path, pathlen, basename,
-				 a->pattern, base, stk->originlen))
+				 &a->pat, base, stk->originlen))
 			rem = fill_one("fill", a, rem);
 	}
 	return rem;
diff --git a/dir.c b/dir.c
index 92cda82..fd49336 100644
--- a/dir.c
+++ b/dir.c
@@ -292,7 +292,7 @@ int match_pathspec_depth(const struct pathspec *ps,
 /*
  * Return the length of the "simple" part of a path match limiter.
  */
-static int simple_length(const char *match)
+int simple_length(const char *match)
 {
 	int len = -1;
 
@@ -304,7 +304,7 @@ static int simple_length(const char *match)
 	}
 }
 
-static int no_wildcard(const char *string)
+int no_wildcard(const char *string)
 {
 	return string[simple_length(string)] == '\0';
 }
diff --git a/dir.h b/dir.h
index 893465a..7ea8678 100644
--- a/dir.h
+++ b/dir.h
@@ -101,6 +101,8 @@ extern void add_exclude(const char *string, const char *base,
 			int baselen, struct exclude_list *which);
 extern void free_excludes(struct exclude_list *el);
 extern int file_exists(const char *);
+extern int simple_length(const char *match);
+extern int no_wildcard(const char *string);
 
 extern int is_inside_dir(const char *dir);
 extern int dir_inside_of(const char *subdir, const char *dir);
-- 
1.7.12.1.405.gb727dc9

  parent reply	other threads:[~2012-10-04 22:01 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-02 23:20 What's cooking in git.git (Oct 2012, #01; Tue, 2) Junio C Hamano
2012-10-03 15:23 ` Nguyen Thai Ngoc Duy
2012-10-03 18:17   ` Junio C Hamano
2012-10-04  1:56     ` Nguyen Thai Ngoc Duy
2012-10-04  6:01       ` Junio C Hamano
2012-10-04  7:39         ` [PATCH 0/6] wildmatch part 2 Nguyễn Thái Ngọc Duy
2012-10-04  7:39           ` [PATCH 1/6] attr: remove the union in struct match_attr Nguyễn Thái Ngọc Duy
2012-10-04  7:39           ` [PATCH 2/6] attr: avoid strlen() on every match Nguyễn Thái Ngọc Duy
2012-10-04  7:39           ` [PATCH 3/6] attr: avoid searching for basename " Nguyễn Thái Ngọc Duy
2012-10-04  7:39           ` Nguyễn Thái Ngọc Duy [this message]
2012-10-04  7:39           ` [PATCH 5/6] gitignore: do not do basename match with patterns that have '**' Nguyễn Thái Ngọc Duy
2012-10-04 17:59             ` Junio C Hamano
2012-10-05  7:01             ` Johannes Sixt
2012-10-05 11:23               ` Nguyen Thai Ngoc Duy
2012-10-04  7:39           ` [PATCH 6/6] t3001: note about expected "**" behavior Nguyễn Thái Ngọc Duy
2012-10-04 18:04             ` Junio C Hamano
2012-10-04 17:43           ` [PATCH 0/6] wildmatch part 2 Junio C Hamano
2012-10-04  9:34     ` What's cooking in git.git (Oct 2012, #01; Tue, 2) Michael Haggerty
2012-10-04 11:46       ` Nguyen Thai Ngoc Duy
2012-10-04 15:17         ` Michael Haggerty
2012-10-04 16:39       ` Junio C Hamano
2012-10-05 12:19         ` Andreas Schwab
2012-10-05 12:30           ` Matthieu Moy
2012-10-05 14:15             ` Andreas Schwab
2012-10-05 13:21         ` Nguyen Thai Ngoc Duy
2012-10-04  8:17 ` David Michael Barr
2012-10-04  8:30   ` fa/remote-svn (Re: What's cooking in git.git (Oct 2012, #01; Tue, 2)) Jonathan Nieder
2012-10-04 13:16     ` Stephen Bash
2012-10-04 16:30       ` Junio C Hamano
2012-10-04 16:27   ` What's cooking in git.git (Oct 2012, #01; Tue, 2) Junio C Hamano
2012-10-30 12:15 ` Florian Achleitner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1349336392-1772-5-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).