git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH/RFC v3] grep: Add the option '--exclude'
@ 2012-01-31 14:45 Albert Yale
  2012-02-01  0:31 ` Junio C Hamano
  0 siblings, 1 reply; 2+ messages in thread
From: Albert Yale @ 2012-01-31 14:45 UTC (permalink / raw)
  To: git; +Cc: gitster, Albert Yale

[PATCH/RFC v3] grep: Add the option '--exclude'

Signed-off-by: Albert Yale <surfingalbert@gmail.com>
---
Here's my latest progress towards a cleaner implementation.

Albert Yale

 Documentation/git-grep.txt |    7 +++
 builtin/grep.c             |   41 +++++++++++++--
 cache.h                    |    3 +
 dir.c                      |  126 +++++++++++++++++++++++++++++++++++++------
 dir.h                      |    3 +-
 read-cache.c               |    2 +-
 6 files changed, 158 insertions(+), 24 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 6a8b1e3..67a83aa 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -22,6 +22,7 @@ SYNOPSIS
 	   [--color[=<when>] | --no-color]
 	   [-A <post-context>] [-B <pre-context>] [-C <context>]
 	   [-f <file>] [-e] <pattern>
+	   [-x <pattern>|--exclude <pattern>]
 	   [--and|--or|--not|(|)|-e <pattern>...]
 	   [ [--exclude-standard] [--cached | --no-index | --untracked] | <tree>...]
 	   [--] [<pathspec>...]
@@ -124,6 +125,12 @@ OPTIONS
 	Use fixed strings for patterns (don't interpret pattern
 	as a regex).
 
+-x <pattern>::
+--exclude <pattern>::
+	In addition to those found in .gitignore (per directory) and
+	$GIT_DIR/info/exclude, also consider these patterns to be in the
+	set of the ignore rules in effect.
+
 -n::
 --line-number::
 	Prefix the line number to matching lines.
diff --git a/builtin/grep.c b/builtin/grep.c
index 9ce064a..e9e1ac1 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -528,7 +528,8 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int
 		struct cache_entry *ce = active_cache[nr];
 		if (!S_ISREG(ce->ce_mode))
 			continue;
-		if (!match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 0, NULL))
+		if (!match_pathspec_depth(pathspec, ce->name, ce_namelen(ce),
+					  0, NULL, 1))
 			continue;
 		/*
 		 * If CE_VALID is on, we assume worktree file and its cache entry
@@ -566,6 +567,11 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 	while (tree_entry(tree, &entry)) {
 		int te_len = tree_entry_len(&entry);
 
+		if (match_pathspec_depth(pathspec->exclude,
+					 entry.path, strlen(entry.path),
+					 0, NULL, 0))
+			continue;
+
 		if (match != all_entries_interesting) {
 			match = tree_entry_interesting(&entry, base, tn_len, pathspec);
 			if (match == all_entries_not_interesting)
@@ -606,8 +612,17 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
 		       struct object *obj, const char *name)
 {
-	if (obj->type == OBJ_BLOB)
+	if (obj->type == OBJ_BLOB) {
+		const char *name_without_sha1 = strchr(name, ':') + 1;
+
+		if (match_pathspec_depth(pathspec->exclude,
+					 name_without_sha1,
+					 strlen(name_without_sha1),
+					 0, NULL, 0))
+			return 0;
+
 		return grep_sha1(opt, obj->sha1, name, 0);
+	}
 	if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
 		struct tree_desc tree;
 		void *data;
@@ -671,7 +686,7 @@ static int grep_directory(struct grep_opt *opt, const struct pathspec *pathspec,
 	for (i = 0; i < dir.nr; i++) {
 		const char *name = dir.entries[i]->name;
 		int namelen = strlen(name);
-		if (!match_pathspec_depth(pathspec, name, namelen, 0, NULL))
+		if (!match_pathspec_depth(pathspec, name, namelen, 0, NULL, 1))
 			continue;
 		hit |= grep_file(opt, dir.entries[i]->name);
 		if (hit && opt->status_only)
@@ -764,6 +779,14 @@ static int pattern_callback(const struct option *opt, const char *arg,
 	return 0;
 }
 
+static int exclude_cb(const struct option *opt, const char *arg,
+		      int unset)
+{
+	struct string_list *exclude_list = opt->value;
+	string_list_append(exclude_list, arg);
+	return 0;
+}
+
 static int help_callback(const struct option *opt, const char *arg, int unset)
 {
 	return -1;
@@ -792,7 +815,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		pattern_type_pcre,
 	};
 	int pattern_type = pattern_type_unspecified;
-
+	struct string_list exclude_list = STRING_LIST_INIT_NODUP;
 	struct option options[] = {
 		OPT_BOOLEAN(0, "cached", &cached,
 			"search in index instead of in the work tree"),
@@ -872,6 +895,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 			"read patterns from file", file_callback),
 		{ OPTION_CALLBACK, 'e', NULL, &opt, "pattern",
 			"match <pattern>", PARSE_OPT_NONEG, pattern_callback },
+		{ OPTION_CALLBACK, 'x', "exclude", &exclude_list, "pattern",
+		  "add <pattern> to ignore rules", PARSE_OPT_NONEG, exclude_cb },
 		{ OPTION_CALLBACK, 0, "and", &opt, NULL,
 		  "combine patterns specified with -e",
 		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, and_callback },
@@ -1053,6 +1078,12 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	pathspec.max_depth = opt.max_depth;
 	pathspec.recursive = 1;
 
+	if (exclude_list.nr) {
+		add_pathspec_item_from_string_list(pathspec.exclude, &exclude_list);
+		pathspec.exclude->max_depth = opt.max_depth;
+		pathspec.exclude->recursive = 1;
+	}
+
 	if (show_in_pager && (cached || list.nr))
 		die(_("--open-files-in-pager only works on the worktree"));
 
@@ -1102,5 +1133,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 	if (hit && show_in_pager)
 		run_pager(&opt, prefix);
 	free_grep_patterns(&opt);
+	free_pathspec(&pathspec);
+	string_list_clear(&exclude_list, 0);
 	return !hit;
 }
diff --git a/cache.h b/cache.h
index 9bd8c2d..683458a 100644
--- a/cache.h
+++ b/cache.h
@@ -533,9 +533,12 @@ struct pathspec {
 		int len;
 		unsigned int use_wildcard:1;
 	} *items;
+
+	struct pathspec *exclude; /* This is never NULL. */
 };
 
 extern int init_pathspec(struct pathspec *, const char **);
+extern int add_pathspec_item_from_string_list(struct pathspec *, const struct string_list *);
 extern void free_pathspec(struct pathspec *);
 extern int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec);
 
diff --git a/dir.c b/dir.c
index 0a78d00..9e4312c 100644
--- a/dir.c
+++ b/dir.c
@@ -9,6 +9,8 @@
 #include "dir.h"
 #include "refs.h"
 
+#include "string-list.h"
+
 struct path_simplify {
 	int len;
 	const char *path;
@@ -253,34 +255,47 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix,
  */
 int match_pathspec_depth(const struct pathspec *ps,
 			 const char *name, int namelen,
-			 int prefix, char *seen)
+			 int prefix, char *seen,
+			 int empty_pathspec_can_match)
 {
 	int i, retval = 0;
+	const char *name_without_prefix = name + prefix;
+	int namelen_without_prefix = namelen - prefix;
 
 	if (!ps->nr) {
-		if (!ps->recursive || ps->max_depth == -1)
-			return MATCHED_RECURSIVELY;
+		if (empty_pathspec_can_match) {
+			if (match_pathspec_depth(ps->exclude,
+						   name, namelen,
+						   prefix, seen, 0))
+				return 0;
 
-		if (within_depth(name, namelen, 0, ps->max_depth))
-			return MATCHED_EXACTLY;
+			if (!ps->recursive || ps->max_depth == -1)
+				return MATCHED_RECURSIVELY;
+
+			if (within_depth(name, namelen, 0, ps->max_depth))
+				return MATCHED_EXACTLY;
+			else
+				return 0;
+		}
 		else
 			return 0;
 	}
 
-	name += prefix;
-	namelen -= prefix;
-
 	for (i = ps->nr - 1; i >= 0; i--) {
 		int how;
 		if (seen && seen[i] == MATCHED_EXACTLY)
 			continue;
-		how = match_pathspec_item(ps->items+i, prefix, name, namelen);
+		how = match_pathspec_item(ps->items+i, prefix,
+					  name_without_prefix,
+					  namelen_without_prefix);
 		if (ps->recursive && ps->max_depth != -1 &&
 		    how && how != MATCHED_FNMATCH) {
 			int len = ps->items[i].len;
-			if (name[len] == '/')
+			if (name_without_prefix[len] == '/')
 				len++;
-			if (within_depth(name+len, namelen-len, 0, ps->max_depth))
+			if (within_depth(name_without_prefix+len,
+					 namelen_without_prefix-len,
+					 0, ps->max_depth))
 				how = MATCHED_EXACTLY;
 			else
 				how = 0;
@@ -292,6 +307,12 @@ int match_pathspec_depth(const struct pathspec *ps,
 				seen[i] = how;
 		}
 	}
+
+	if (retval && match_pathspec_depth(ps->exclude,
+					   name, namelen,
+					   prefix, seen, 0))
+		return 0;
+
 	return retval;
 }
 
@@ -1259,12 +1280,79 @@ static int pathspec_item_cmp(const void *a_, const void *b_)
 	return strcmp(a->match, b->match);
 }
 
+static int init_pathspec_item(struct pathspec_item *item, const char *path)
+{
+	item->match = path;
+	item->len = strlen(path);
+	item->use_wildcard = !no_wildcard(path);
+
+	return item->use_wildcard;
+}
+
+int add_pathspec_item_from_string_list(struct pathspec *pathspec,
+				       const struct string_list *path_list)
+{
+	int i;
+	struct pathspec_item *new_items;
+	int previous_items_count = pathspec->nr;
+
+	if (!path_list->nr)
+		return 0;
+
+	pathspec->nr += path_list->nr;
+
+	if (previous_items_count)
+		pathspec->items = xrealloc(pathspec->items,
+					   sizeof(struct pathspec_item)*pathspec->nr);
+	else
+		pathspec->items = xcalloc(pathspec->nr, sizeof(struct pathspec_item));
+
+	new_items = pathspec->items + previous_items_count;
+
+	for (i = 0; i < path_list->nr; i++) {
+		struct pathspec_item *item = new_items+i;
+		const char *path = path_list->items[i].string;
+
+		pathspec->has_wildcard |= init_pathspec_item(item, path);
+	}
+
+	/*
+	* Give the pathspec a pathspec->exclude so we can test
+	* pathspec->exclude->nr without testing the validity of
+	* pathspec->exclude first.
+	*/
+	if (!pathspec->exclude)
+		pathspec->exclude = xcalloc(1, sizeof(*pathspec));
+
+	qsort(pathspec->items, pathspec->nr,
+	      sizeof(struct pathspec_item), pathspec_item_cmp);
+
+	/*
+	* Please comment/review:
+	*
+	* Here, pathspec->raw is NULL despite pathspec->nr being non-zero.
+	*
+	* Usually, a NULL pathspec->raw implies that pathspec->nr is zero.
+	*
+	* A problem might arrise if other areas of the code assume that a
+	* NULL pathspec->raw means that pathspec->nr is zero, or vice-versa.
+	*/
+
+	return 0;
+}
+
 int init_pathspec(struct pathspec *pathspec, const char **paths)
 {
 	const char **p = paths;
 	int i;
 
 	memset(pathspec, 0, sizeof(*pathspec));
+	/*
+	* We create pathspec->exclude unconditionally so that we don't have
+	* to test the validity of pathspec->exclude before using the value
+	* of pathspec->exclude->nr.
+	*/
+	pathspec->exclude = xcalloc(1, sizeof(*pathspec));
 	if (!p)
 		return 0;
 	while (*p)
@@ -1279,11 +1367,7 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
 		struct pathspec_item *item = pathspec->items+i;
 		const char *path = paths[i];
 
-		item->match = path;
-		item->len = strlen(path);
-		item->use_wildcard = !no_wildcard(path);
-		if (item->use_wildcard)
-			pathspec->has_wildcard = 1;
+		pathspec->has_wildcard |= init_pathspec_item(item, path);
 	}
 
 	qsort(pathspec->items, pathspec->nr,
@@ -1294,6 +1378,12 @@ int init_pathspec(struct pathspec *pathspec, const char **paths)
 
 void free_pathspec(struct pathspec *pathspec)
 {
-	free(pathspec->items);
-	pathspec->items = NULL;
+	if (pathspec->items) {
+		free(pathspec->items);
+		pathspec->items = NULL;
+	}
+	if (pathspec->exclude) {
+		free_pathspec(pathspec->exclude);
+		pathspec->exclude = NULL;
+	}
 }
diff --git a/dir.h b/dir.h
index dd6947e..2b9870d 100644
--- a/dir.h
+++ b/dir.h
@@ -68,7 +68,8 @@ extern char *common_prefix(const char **pathspec);
 extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen);
 extern int match_pathspec_depth(const struct pathspec *pathspec,
 				const char *name, int namelen,
-				int prefix, char *seen);
+				int prefix, char *seen,
+				int empty_pathspec_can_match);
 extern int within_depth(const char *name, int namelen, int depth, int max_depth);
 
 extern int fill_directory(struct dir_struct *dir, const char **pathspec);
diff --git a/read-cache.c b/read-cache.c
index a51bba1..9783c4f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -708,7 +708,7 @@ int ce_same_name(struct cache_entry *a, struct cache_entry *b)
 
 int ce_path_match(const struct cache_entry *ce, const struct pathspec *pathspec)
 {
-	return match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 0, NULL);
+	return match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 0, NULL, 1);
 }
 
 /*
-- 
1.7.8.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH/RFC v3] grep: Add the option '--exclude'
  2012-01-31 14:45 [PATCH/RFC v3] grep: Add the option '--exclude' Albert Yale
@ 2012-02-01  0:31 ` Junio C Hamano
  0 siblings, 0 replies; 2+ messages in thread
From: Junio C Hamano @ 2012-02-01  0:31 UTC (permalink / raw)
  To: Albert Yale; +Cc: git, Nguyễn Thái Ngọc Duy

Albert Yale <surfingalbert@gmail.com> writes:

> @@ -124,6 +125,12 @@ OPTIONS
>  	Use fixed strings for patterns (don't interpret pattern
>  	as a regex).
>  
> +-x <pattern>::
> +--exclude <pattern>::
> +	In addition to those found in .gitignore (per directory) and
> +	$GIT_DIR/info/exclude, also consider these patterns to be in the
> +	set of the ignore rules in effect.

That makes it sound as if "git grep" will not produce hits for a path that
was forcibly added despite it matches a pattern in .gitignore file, which
is not true at all.

>  -n::
>  --line-number::
>  	Prefix the line number to matching lines.
> diff --git a/builtin/grep.c b/builtin/grep.c
> index 9ce064a..e9e1ac1 100644
> --- a/builtin/grep.c
> +++ b/builtin/grep.c
> @@ -528,7 +528,8 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int
>  		struct cache_entry *ce = active_cache[nr];
>  		if (!S_ISREG(ce->ce_mode))
>  			continue;
> -		if (!match_pathspec_depth(pathspec, ce->name, ce_namelen(ce), 0, NULL))
> +		if (!match_pathspec_depth(pathspec, ce->name, ce_namelen(ce),
> +					  0, NULL, 1))
>  			continue;
>  		/*
>  		 * If CE_VALID is on, we assume worktree file and its cache entry
> @@ -566,6 +567,11 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
>  	while (tree_entry(tree, &entry)) {
>  		int te_len = tree_entry_len(&entry);
>  
> +		if (match_pathspec_depth(pathspec->exclude,
> +					 entry.path, strlen(entry.path),
> +					 0, NULL, 0))
> +			continue;
> +

Why???

IOW, why isn't this

	if (!match_pathspec_depth(pathspec, ...))
		continue;	

My point of the two previous review messages was that it would be nice if
we can make it so that nobody outside match_pathspec_depth() should even
need to know existence of pathspec->exclude. Either that was ignored, or
perhaps you found a compelling reason why that is not a good idea, but if
so I'd like to know why.

I suspect that you do not need to add the extra 0 at the end (it makes the
caller even harder to read) if you go that route. The extra parameter
defeats the whole point of encapsulating the new "negative match" feature
behind match_pathspec_depth() implementation.

> +	if (obj->type == OBJ_BLOB) {
> +		const char *name_without_sha1 = strchr(name, ':') + 1;
> +
> +		if (match_pathspec_depth(pathspec->exclude,
> +					 name_without_sha1,
> +					 strlen(name_without_sha1),
> +					 0, NULL, 0))
> +			return 0;

Likewise.

> diff --git a/cache.h b/cache.h
> index 9bd8c2d..683458a 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -533,9 +533,12 @@ struct pathspec {
>  		int len;
>  		unsigned int use_wildcard:1;
>  	} *items;
> +
> +	struct pathspec *exclude; /* This is never NULL. */
>  };

"This is never NULL" is blatantly wrong, as pathspec->exclude->exclude
will very likely be NULL.

I've been assuming that the resulting structure would be more like this: 

 struct pathspec_set {
         int nr;
         unsigned int has_wildcard:1;
         unsigned int recursive:1;
         struct pathspec_item {
                 const char *match;
                 int len;
                 unsigned int use_wildcard:1;
         } *items;
 };

 struct pathspec {
         const char **raw; /* get_pathspec() result, not freed by free_pathspec() */
         int max_depth;
         struct pathspec_set include;
         struct pathspec_set exclude;
 };
 
I am not including "raw" in the include/exclude because its use should be
deprecated away over time.

The various helpers used in match_pathspec_depth() that know _only_ about
matching and not about excluding will be changed to take a pointer to
"struct pathspec_set" (and possibly max_depth). The top-level callers
would not have to know any of these changes if we structure the API that
way, no?

I think something like that was more or less the structure we discussed
when Nguyễn brought up his negative pathspec work the last time.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-02-01  0:31 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31 14:45 [PATCH/RFC v3] grep: Add the option '--exclude' Albert Yale
2012-02-01  0:31 ` Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).