git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / Atom feed
* [PATCH] dir: restructure in a way to avoid passing around a struct dirent
@ 2020-01-14 16:32 Elijah Newren via GitGitGadget
  2020-01-14 21:07 ` Johannes Schindelin
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Elijah Newren via GitGitGadget @ 2020-01-14 16:32 UTC (permalink / raw)
  To: git; +Cc: Elijah Newren

Restructure the code slightly to avoid passing around a struct dirent
anywhere, which also enables us to avoid trying to manufacture one.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
    dir: restructure in a way to avoid passing around a struct dirent
    
    This is a follow up to en/fill-directory-fixes; a somewhat embarrassing
    one...
    
    When I was more than a bit burned out on dir.c in late December, I had
    looked at making another change to remove the need for a dirent but
    punted because it looked like "way too much work". Junio and Peff both
    later suggested the same cleanup, but I said it was too much work. Peff
    posted a patch which demonstrated that it was actually pretty simple[1],
    but I somehow read his email wrong and assumed he was commenting on my
    patch (if I had looked even slightly closer...). His patch was actually
    for before en/fill-directory-fixes was applied, so I have updated it
    slightly to apply after that series.
    
    I'm not sure if I'm supposed to add a Reviewed-by me or a Signed-off-by
    or both, but I read through it closely to avoid any hidden surprises. I
    probably could have saved Dscho some time last month if I would have
    just looked a little closer. Sorry about all that...
    
    (And yeah, we'll need Peff's Signed-off-by, since I'm leaving him as the
    author; I really only made a small tweak to update his patch.)
    
    CC: Jeff King peff@peff.net [peff@peff.net], Johannes Schindelin 
    Johannes.Schindelin@gmx.de [Johannes.Schindelin@gmx.de]
    
    [1] 
    https://lore.kernel.org/git/20191219222403.GA705525@coredump.intra.peff.net/

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-692%2Fnewren%2Favoid-dirent-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-692/newren/avoid-dirent-v1
Pull-Request: https://github.com/git/git/pull/692

 dir.c | 73 +++++++++++++++++++++++++----------------------------------
 1 file changed, 31 insertions(+), 42 deletions(-)

diff --git a/dir.c b/dir.c
index 7d255227b1..356d24bff8 100644
--- a/dir.c
+++ b/dir.c
@@ -41,7 +41,8 @@ struct cached_dir {
 	int nr_files;
 	int nr_dirs;
 
-	struct dirent *de;
+	const char *d_name;
+	int d_type;
 	const char *file;
 	struct untracked_cache_dir *ucd;
 };
@@ -50,8 +51,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	struct index_state *istate, const char *path, int len,
 	struct untracked_cache_dir *untracked,
 	int check_only, int stop_at_first_file, const struct pathspec *pathspec);
-static int get_dtype(struct dirent *de, struct index_state *istate,
-		     const char *path, int len);
+static int resolve_dtype(int dtype, struct index_state *istate,
+			 const char *path, int len);
 
 int count_slashes(const char *s)
 {
@@ -1215,8 +1216,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
 		int prefix = pattern->nowildcardlen;
 
 		if (pattern->flags & PATTERN_FLAG_MUSTBEDIR) {
-			if (*dtype == DT_UNKNOWN)
-				*dtype = get_dtype(NULL, istate, pathname, pathlen);
+			*dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
 			if (*dtype != DT_DIR)
 				continue;
 		}
@@ -1842,10 +1842,9 @@ static int get_index_dtype(struct index_state *istate,
 	return DT_UNKNOWN;
 }
 
-static int get_dtype(struct dirent *de, struct index_state *istate,
-		     const char *path, int len)
+static int resolve_dtype(int dtype, struct index_state *istate,
+			 const char *path, int len)
 {
-	int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 	struct stat st;
 
 	if (dtype != DT_UNKNOWN)
@@ -1870,14 +1869,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct strbuf *path,
 					  int baselen,
 					  const struct pathspec *pathspec,
-					  int dtype, struct dirent *de)
+					  int dtype)
 {
 	int exclude;
 	int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
 	enum path_treatment path_treatment;
 
-	if (dtype == DT_UNKNOWN)
-		dtype = get_dtype(de, istate, path->buf, path->len);
+	dtype = resolve_dtype(dtype, istate, path->buf, path->len);
 
 	/* Always exclude indexed files */
 	if (dtype != DT_DIR && has_path_in_index)
@@ -1985,21 +1983,18 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 				      int baselen,
 				      const struct pathspec *pathspec)
 {
-	int dtype;
-	struct dirent *de = cdir->de;
-
-	if (!de)
+	if (!cdir->d_name)
 		return treat_path_fast(dir, untracked, cdir, istate, path,
 				       baselen, pathspec);
-	if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git"))
+	if (is_dot_or_dotdot(cdir->d_name) || !fspathcmp(cdir->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
-	strbuf_addstr(path, de->d_name);
+	strbuf_addstr(path, cdir->d_name);
 	if (simplify_away(path->buf, path->len, pathspec))
 		return path_none;
 
-	dtype = DTYPE(de);
-	return treat_one_path(dir, untracked, istate, path, baselen, pathspec, dtype, de);
+	return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
+			      cdir->d_type);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -2087,10 +2082,17 @@ static int open_cached_dir(struct cached_dir *cdir,
 
 static int read_cached_dir(struct cached_dir *cdir)
 {
+	struct dirent *de;
+
 	if (cdir->fdir) {
-		cdir->de = readdir(cdir->fdir);
-		if (!cdir->de)
+		de = readdir(cdir->fdir);
+		if (!de) {
+			cdir->d_name = NULL;
+			cdir->d_type = DT_UNKNOWN;
 			return -1;
+		}
+		cdir->d_name = de->d_name;
+		cdir->d_type = DTYPE(de);
 		return 0;
 	}
 	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
@@ -2216,7 +2218,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		/* recurse into subdir if instructed by treat_path */
 		if ((state == path_recurse) ||
 			((state == path_untracked) &&
-			 (get_dtype(cdir.de, istate, path.buf, path.len) == DT_DIR) &&
+			 (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
 			 ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
 			  (pathspec &&
 			   do_match_pathspec(istate, pathspec, path.buf, path.len,
@@ -2314,10 +2316,10 @@ static int treat_leading_path(struct dir_struct *dir,
 	 */
 
 	struct strbuf sb = STRBUF_INIT;
+	struct strbuf subdir = STRBUF_INIT;
 	int prevlen, baselen;
 	const char *cp;
 	struct cached_dir cdir;
-	struct dirent *de;
 	enum path_treatment state = path_none;
 
 	/*
@@ -2342,22 +2344,8 @@ static int treat_leading_path(struct dir_struct *dir,
 	if (!len)
 		return 1;
 
-	/*
-	 * We need a manufactured dirent with sufficient space to store a
-	 * leading directory component of path in its d_name.  Here, we
-	 * assume that the dirent's d_name is either declared as
-	 *    char d_name[BIG_ENOUGH]
-	 * or that it is declared at the end of the struct as
-	 *    char d_name[]
-	 * For either case, padding with len+1 bytes at the end will ensure
-	 * sufficient storage space.
-	 */
-	de = xcalloc(1, st_add3(sizeof(struct dirent), len, 1));
 	memset(&cdir, 0, sizeof(cdir));
-	cdir.de = de;
-#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
-	de->d_type = DT_DIR;
-#endif
+	cdir.d_type = DT_DIR;
 	baselen = 0;
 	prevlen = 0;
 	while (1) {
@@ -2374,12 +2362,13 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		strbuf_reset(&sb);
 		strbuf_add(&sb, path, prevlen);
-		memcpy(de->d_name, path+prevlen, baselen-prevlen);
-		de->d_name[baselen-prevlen] = '\0';
+		strbuf_reset(&subdir);
+		strbuf_add(&subdir, path+prevlen, baselen-prevlen);
+		cdir.d_name = subdir.buf;
 		state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
 				    pathspec);
 		if (state == path_untracked &&
-		    get_dtype(cdir.de, istate, sb.buf, sb.len) == DT_DIR &&
+		    resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
 		    (dir->flags & DIR_SHOW_IGNORED_TOO ||
 		     do_match_pathspec(istate, pathspec, sb.buf, sb.len,
 				       baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
@@ -2399,7 +2388,7 @@ static int treat_leading_path(struct dir_struct *dir,
 					    &sb, baselen, pathspec,
 					    state);
 
-	free(de);
+	strbuf_release(&subdir);
 	strbuf_release(&sb);
 	return state == path_recurse;
 }

base-commit: d0654dc308b0ba76dd8ed7bbb33c8d8f7aacd783
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] dir: restructure in a way to avoid passing around a struct dirent
  2020-01-14 16:32 [PATCH] dir: restructure in a way to avoid passing around a struct dirent Elijah Newren via GitGitGadget
@ 2020-01-14 21:07 ` Johannes Schindelin
  2020-01-14 21:13   ` Elijah Newren
  2020-01-14 22:03 ` Jeff King
  2020-01-15 14:21 ` [PATCH v2] " Elijah Newren via GitGitGadget
  2 siblings, 1 reply; 15+ messages in thread
From: Johannes Schindelin @ 2020-01-14 21:07 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren

Hi,


On Tue, 14 Jan 2020, Elijah Newren via GitGitGadget wrote:

> Restructure the code slightly to avoid passing around a struct dirent
> anywhere, which also enables us to avoid trying to manufacture one.

Please note that due a bug (which has been fixed in the meantime, see
https://github.com/gitgitgadget/gitgitgadget/pull/188 for details), this
GitGitGadget mail is missing the line

	From: Jeff King <peff@peff.net>

The patch itself looks fine to me.

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] dir: restructure in a way to avoid passing around a struct dirent
  2020-01-14 21:07 ` Johannes Schindelin
@ 2020-01-14 21:13   ` Elijah Newren
  0 siblings, 0 replies; 15+ messages in thread
From: Elijah Newren @ 2020-01-14 21:13 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Elijah Newren via GitGitGadget, Git Mailing List

Hi Dscho,

On Tue, Jan 14, 2020 at 1:07 PM Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:
>
> Hi,
>
>
> On Tue, 14 Jan 2020, Elijah Newren via GitGitGadget wrote:
>
> > Restructure the code slightly to avoid passing around a struct dirent
> > anywhere, which also enables us to avoid trying to manufacture one.
>
> Please note that due a bug (which has been fixed in the meantime, see
> https://github.com/gitgitgadget/gitgitgadget/pull/188 for details), this
> GitGitGadget mail is missing the line
>
>         From: Jeff King <peff@peff.net>

Thanks for the heads up.  Once Peff provides his Signed-off-by, I'll
add it to the patch and resubmit which will hopefully fix both the
from and the signed-off-by at the same time.

> The patch itself looks fine to me.
>
> Thanks,
> Dscho

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] dir: restructure in a way to avoid passing around a struct dirent
  2020-01-14 16:32 [PATCH] dir: restructure in a way to avoid passing around a struct dirent Elijah Newren via GitGitGadget
  2020-01-14 21:07 ` Johannes Schindelin
@ 2020-01-14 22:03 ` Jeff King
  2020-01-15 14:21 ` [PATCH v2] " Elijah Newren via GitGitGadget
  2 siblings, 0 replies; 15+ messages in thread
From: Jeff King @ 2020-01-14 22:03 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren

On Tue, Jan 14, 2020 at 04:32:12PM +0000, Elijah Newren via GitGitGadget wrote:

>     When I was more than a bit burned out on dir.c in late December, I had
>     looked at making another change to remove the need for a dirent but
>     punted because it looked like "way too much work". Junio and Peff both
>     later suggested the same cleanup, but I said it was too much work. Peff
>     posted a patch which demonstrated that it was actually pretty simple[1],
>     but I somehow read his email wrong and assumed he was commenting on my
>     patch (if I had looked even slightly closer...). His patch was actually
>     for before en/fill-directory-fixes was applied, so I have updated it
>     slightly to apply after that series.

Yeah, I hadn't really grokked your patch, so I was hoping you'd build
your stuff on top. :)

>     I'm not sure if I'm supposed to add a Reviewed-by me or a Signed-off-by
>     or both, but I read through it closely to avoid any hidden surprises. I
>     probably could have saved Dscho some time last month if I would have
>     just looked a little closer. Sorry about all that...
>     
>     (And yeah, we'll need Peff's Signed-off-by, since I'm leaving him as the
>     author; I really only made a small tweak to update his patch.)
>     
>     CC: Jeff King peff@peff.net [peff@peff.net], Johannes Schindelin 
>     Johannes.Schindelin@gmx.de [Johannes.Schindelin@gmx.de]

Hmm, looks like you maybe meant for more headers (and cc's) to happen
than did. Anyway, I'm certainly fine with this direction, and you have
my:

  Signed-off-by: Jeff King <peff@peff.net>

-Peff

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2] dir: restructure in a way to avoid passing around a struct dirent
  2020-01-14 16:32 [PATCH] dir: restructure in a way to avoid passing around a struct dirent Elijah Newren via GitGitGadget
  2020-01-14 21:07 ` Johannes Schindelin
  2020-01-14 22:03 ` Jeff King
@ 2020-01-15 14:21 ` Elijah Newren via GitGitGadget
  2020-01-15 20:21   ` [PATCH] dir: point treat_leading_path() warning to the right place Jeff King
  2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
  2 siblings, 2 replies; 15+ messages in thread
From: Elijah Newren via GitGitGadget @ 2020-01-15 14:21 UTC (permalink / raw)
  To: git; +Cc: Jeff King, Johannes Schindelin, Elijah Newren, Jeff King

From: Jeff King <peff@peff.net>

Restructure the code slightly to avoid passing around a struct dirent
anywhere, which also enables us to avoid trying to manufacture one.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Elijah Newren <newren@gmail.com>
---
    dir: restructure in a way to avoid passing around a struct dirent
    
    This is a follow up to en/fill-directory-fixes, and consists of a small
    tweak made by me to a patch proposed by Peff[1] (signoff from [2]).
    
    [1] 
    https://lore.kernel.org/git/20191219222403.GA705525@coredump.intra.peff.net/
    [2] 
    https://lore.kernel.org/git/20200114220357.GA3957260@coredump.intra.peff.net/

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-692%2Fnewren%2Favoid-dirent-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-692/newren/avoid-dirent-v2
Pull-Request: https://github.com/git/git/pull/692

Range-diff vs v1:

 1:  f63d591d3d ! 1:  edb18304b2 dir: restructure in a way to avoid passing around a struct dirent
     @@ -5,6 +5,7 @@
          Restructure the code slightly to avoid passing around a struct dirent
          anywhere, which also enables us to avoid trying to manufacture one.
      
     +    Signed-off-by: Jeff King <peff@peff.net>
          Signed-off-by: Elijah Newren <newren@gmail.com>
      
       diff --git a/dir.c b/dir.c


 dir.c | 73 +++++++++++++++++++++++++----------------------------------
 1 file changed, 31 insertions(+), 42 deletions(-)

diff --git a/dir.c b/dir.c
index 7d255227b1..356d24bff8 100644
--- a/dir.c
+++ b/dir.c
@@ -41,7 +41,8 @@ struct cached_dir {
 	int nr_files;
 	int nr_dirs;
 
-	struct dirent *de;
+	const char *d_name;
+	int d_type;
 	const char *file;
 	struct untracked_cache_dir *ucd;
 };
@@ -50,8 +51,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	struct index_state *istate, const char *path, int len,
 	struct untracked_cache_dir *untracked,
 	int check_only, int stop_at_first_file, const struct pathspec *pathspec);
-static int get_dtype(struct dirent *de, struct index_state *istate,
-		     const char *path, int len);
+static int resolve_dtype(int dtype, struct index_state *istate,
+			 const char *path, int len);
 
 int count_slashes(const char *s)
 {
@@ -1215,8 +1216,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
 		int prefix = pattern->nowildcardlen;
 
 		if (pattern->flags & PATTERN_FLAG_MUSTBEDIR) {
-			if (*dtype == DT_UNKNOWN)
-				*dtype = get_dtype(NULL, istate, pathname, pathlen);
+			*dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
 			if (*dtype != DT_DIR)
 				continue;
 		}
@@ -1842,10 +1842,9 @@ static int get_index_dtype(struct index_state *istate,
 	return DT_UNKNOWN;
 }
 
-static int get_dtype(struct dirent *de, struct index_state *istate,
-		     const char *path, int len)
+static int resolve_dtype(int dtype, struct index_state *istate,
+			 const char *path, int len)
 {
-	int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 	struct stat st;
 
 	if (dtype != DT_UNKNOWN)
@@ -1870,14 +1869,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct strbuf *path,
 					  int baselen,
 					  const struct pathspec *pathspec,
-					  int dtype, struct dirent *de)
+					  int dtype)
 {
 	int exclude;
 	int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
 	enum path_treatment path_treatment;
 
-	if (dtype == DT_UNKNOWN)
-		dtype = get_dtype(de, istate, path->buf, path->len);
+	dtype = resolve_dtype(dtype, istate, path->buf, path->len);
 
 	/* Always exclude indexed files */
 	if (dtype != DT_DIR && has_path_in_index)
@@ -1985,21 +1983,18 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 				      int baselen,
 				      const struct pathspec *pathspec)
 {
-	int dtype;
-	struct dirent *de = cdir->de;
-
-	if (!de)
+	if (!cdir->d_name)
 		return treat_path_fast(dir, untracked, cdir, istate, path,
 				       baselen, pathspec);
-	if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git"))
+	if (is_dot_or_dotdot(cdir->d_name) || !fspathcmp(cdir->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
-	strbuf_addstr(path, de->d_name);
+	strbuf_addstr(path, cdir->d_name);
 	if (simplify_away(path->buf, path->len, pathspec))
 		return path_none;
 
-	dtype = DTYPE(de);
-	return treat_one_path(dir, untracked, istate, path, baselen, pathspec, dtype, de);
+	return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
+			      cdir->d_type);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -2087,10 +2082,17 @@ static int open_cached_dir(struct cached_dir *cdir,
 
 static int read_cached_dir(struct cached_dir *cdir)
 {
+	struct dirent *de;
+
 	if (cdir->fdir) {
-		cdir->de = readdir(cdir->fdir);
-		if (!cdir->de)
+		de = readdir(cdir->fdir);
+		if (!de) {
+			cdir->d_name = NULL;
+			cdir->d_type = DT_UNKNOWN;
 			return -1;
+		}
+		cdir->d_name = de->d_name;
+		cdir->d_type = DTYPE(de);
 		return 0;
 	}
 	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
@@ -2216,7 +2218,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		/* recurse into subdir if instructed by treat_path */
 		if ((state == path_recurse) ||
 			((state == path_untracked) &&
-			 (get_dtype(cdir.de, istate, path.buf, path.len) == DT_DIR) &&
+			 (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
 			 ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
 			  (pathspec &&
 			   do_match_pathspec(istate, pathspec, path.buf, path.len,
@@ -2314,10 +2316,10 @@ static int treat_leading_path(struct dir_struct *dir,
 	 */
 
 	struct strbuf sb = STRBUF_INIT;
+	struct strbuf subdir = STRBUF_INIT;
 	int prevlen, baselen;
 	const char *cp;
 	struct cached_dir cdir;
-	struct dirent *de;
 	enum path_treatment state = path_none;
 
 	/*
@@ -2342,22 +2344,8 @@ static int treat_leading_path(struct dir_struct *dir,
 	if (!len)
 		return 1;
 
-	/*
-	 * We need a manufactured dirent with sufficient space to store a
-	 * leading directory component of path in its d_name.  Here, we
-	 * assume that the dirent's d_name is either declared as
-	 *    char d_name[BIG_ENOUGH]
-	 * or that it is declared at the end of the struct as
-	 *    char d_name[]
-	 * For either case, padding with len+1 bytes at the end will ensure
-	 * sufficient storage space.
-	 */
-	de = xcalloc(1, st_add3(sizeof(struct dirent), len, 1));
 	memset(&cdir, 0, sizeof(cdir));
-	cdir.de = de;
-#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
-	de->d_type = DT_DIR;
-#endif
+	cdir.d_type = DT_DIR;
 	baselen = 0;
 	prevlen = 0;
 	while (1) {
@@ -2374,12 +2362,13 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		strbuf_reset(&sb);
 		strbuf_add(&sb, path, prevlen);
-		memcpy(de->d_name, path+prevlen, baselen-prevlen);
-		de->d_name[baselen-prevlen] = '\0';
+		strbuf_reset(&subdir);
+		strbuf_add(&subdir, path+prevlen, baselen-prevlen);
+		cdir.d_name = subdir.buf;
 		state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
 				    pathspec);
 		if (state == path_untracked &&
-		    get_dtype(cdir.de, istate, sb.buf, sb.len) == DT_DIR &&
+		    resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
 		    (dir->flags & DIR_SHOW_IGNORED_TOO ||
 		     do_match_pathspec(istate, pathspec, sb.buf, sb.len,
 				       baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
@@ -2399,7 +2388,7 @@ static int treat_leading_path(struct dir_struct *dir,
 					    &sb, baselen, pathspec,
 					    state);
 
-	free(de);
+	strbuf_release(&subdir);
 	strbuf_release(&sb);
 	return state == path_recurse;
 }

base-commit: d0654dc308b0ba76dd8ed7bbb33c8d8f7aacd783
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] dir: point treat_leading_path() warning to the right place
  2020-01-15 14:21 ` [PATCH v2] " Elijah Newren via GitGitGadget
@ 2020-01-15 20:21   ` Jeff King
  2020-01-15 20:28     ` Elijah Newren
  2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
  1 sibling, 1 reply; 15+ messages in thread
From: Jeff King @ 2020-01-15 20:21 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget
  Cc: Junio C Hamano, git, Johannes Schindelin, Elijah Newren

On Wed, Jan 15, 2020 at 02:21:18PM +0000, Elijah Newren via GitGitGadget wrote:

> From: Jeff King <peff@peff.net>
> 
> Restructure the code slightly to avoid passing around a struct dirent
> anywhere, which also enables us to avoid trying to manufacture one.
> 
> Signed-off-by: Jeff King <peff@peff.net>
> Signed-off-by: Elijah Newren <newren@gmail.com>

Thanks, this looks good.

I wondered briefly whether this:

> @@ -2374,12 +2362,13 @@ static int treat_leading_path(struct dir_struct *dir,
>  			break;
>  		strbuf_reset(&sb);
>  		strbuf_add(&sb, path, prevlen);
> -		memcpy(de->d_name, path+prevlen, baselen-prevlen);
> -		de->d_name[baselen-prevlen] = '\0';
> +		strbuf_reset(&subdir);
> +		strbuf_add(&subdir, path+prevlen, baselen-prevlen);
> +		cdir.d_name = subdir.buf;

...could avoid the extra strbuf by pointing into an existing string
(since d_name is now a pointer, and not part of a dirent). But I think
the answer is "no", because in a path like "a/b/c", the loop may see
just "b" (so offsetting into path isn't sufficient, because we also have
to cut off the trailing part).

I did notice one other small thing while looking at this code:

-- >8 --
Subject: [PATCH] dir: point treat_leading_path() warning to the right place

Commit 777b420347 (dir: synchronize treat_leading_path() and
read_directory_recursive(), 2019-12-19) tried to add two warning
comments in those functions, pointing at each other. But the one in
treat_leading_path() just points at itself.

Let's fix that. Since the comment also redirects the reader for more
details to "the commit that added this warning", and since we're now
modifying the warning (creating a new commit without those details),
let's mention the actual commit id.

Signed-off-by: Jeff King <peff@peff.net>
---
 dir.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 7d255227b1..31e83d982a 100644
--- a/dir.c
+++ b/dir.c
@@ -2308,9 +2308,9 @@ static int treat_leading_path(struct dir_struct *dir,
 	 * WARNING WARNING WARNING:
 	 *
 	 * Any updates to the traversal logic here may need corresponding
-	 * updates in treat_leading_path().  See the commit message for the
-	 * commit adding this warning as well as the commit preceding it
-	 * for details.
+	 * updates in read_directory_recursive().  See 777b420347 (dir:
+	 * synchronize treat_leading_path() and read_directory_recursive(),
+	 * 2019-12-19) and its parent commit for details.
 	 */
 
 	struct strbuf sb = STRBUF_INIT;
-- 
2.25.0.639.gb9b1511416


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] dir: point treat_leading_path() warning to the right place
  2020-01-15 20:21   ` [PATCH] dir: point treat_leading_path() warning to the right place Jeff King
@ 2020-01-15 20:28     ` Elijah Newren
  0 siblings, 0 replies; 15+ messages in thread
From: Elijah Newren @ 2020-01-15 20:28 UTC (permalink / raw)
  To: Jeff King
  Cc: Elijah Newren via GitGitGadget, Junio C Hamano, Git Mailing List,
	Johannes Schindelin

On Wed, Jan 15, 2020 at 12:21 PM Jeff King <peff@peff.net> wrote:
>
> On Wed, Jan 15, 2020 at 02:21:18PM +0000, Elijah Newren via GitGitGadget wrote:
>
> > From: Jeff King <peff@peff.net>
> >
> > Restructure the code slightly to avoid passing around a struct dirent
> > anywhere, which also enables us to avoid trying to manufacture one.
> >
> > Signed-off-by: Jeff King <peff@peff.net>
> > Signed-off-by: Elijah Newren <newren@gmail.com>
>
> Thanks, this looks good.
>
> I wondered briefly whether this:
>
> > @@ -2374,12 +2362,13 @@ static int treat_leading_path(struct dir_struct *dir,
> >                       break;
> >               strbuf_reset(&sb);
> >               strbuf_add(&sb, path, prevlen);
> > -             memcpy(de->d_name, path+prevlen, baselen-prevlen);
> > -             de->d_name[baselen-prevlen] = '\0';
> > +             strbuf_reset(&subdir);
> > +             strbuf_add(&subdir, path+prevlen, baselen-prevlen);
> > +             cdir.d_name = subdir.buf;
>
> ...could avoid the extra strbuf by pointing into an existing string
> (since d_name is now a pointer, and not part of a dirent). But I think
> the answer is "no", because in a path like "a/b/c", the loop may see
> just "b" (so offsetting into path isn't sufficient, because we also have
> to cut off the trailing part).
>
> I did notice one other small thing while looking at this code:
>
> -- >8 --
> Subject: [PATCH] dir: point treat_leading_path() warning to the right place
>
> Commit 777b420347 (dir: synchronize treat_leading_path() and
> read_directory_recursive(), 2019-12-19) tried to add two warning
> comments in those functions, pointing at each other. But the one in
> treat_leading_path() just points at itself.
>
> Let's fix that. Since the comment also redirects the reader for more
> details to "the commit that added this warning", and since we're now
> modifying the warning (creating a new commit without those details),
> let's mention the actual commit id.
>
> Signed-off-by: Jeff King <peff@peff.net>
> ---
>  dir.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/dir.c b/dir.c
> index 7d255227b1..31e83d982a 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -2308,9 +2308,9 @@ static int treat_leading_path(struct dir_struct *dir,
>          * WARNING WARNING WARNING:
>          *
>          * Any updates to the traversal logic here may need corresponding
> -        * updates in treat_leading_path().  See the commit message for the
> -        * commit adding this warning as well as the commit preceding it
> -        * for details.
> +        * updates in read_directory_recursive().  See 777b420347 (dir:
> +        * synchronize treat_leading_path() and read_directory_recursive(),
> +        * 2019-12-19) and its parent commit for details.
>          */
>
>         struct strbuf sb = STRBUF_INIT;
> --
> 2.25.0.639.gb9b1511416

Looks good to me; thanks.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 0/4] dir: more fill_directory() fixes
  2020-01-15 14:21 ` [PATCH v2] " Elijah Newren via GitGitGadget
  2020-01-15 20:21   ` [PATCH] dir: point treat_leading_path() warning to the right place Jeff King
@ 2020-01-16 20:21   ` Elijah Newren via GitGitGadget
  2020-01-16 20:21     ` [PATCH v3 1/4] clean: demonstrate a bug with pathspecs Derrick Stolee via GitGitGadget
                       ` (3 more replies)
  1 sibling, 4 replies; 15+ messages in thread
From: Elijah Newren via GitGitGadget @ 2020-01-16 20:21 UTC (permalink / raw)
  To: git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren

This is a follow up to en/fill-directory-fixes, and has grown from 1 patch
in v1 to 4 patches in v2, 3 of which are submissions by others to the git
list that I've just combined into one series since they are all about
fill_directory().

The first two patches may make sense to include in maint at some point,
though Stolee isn't that concerned about this regression[5][6].

 * The first patch is Kevin and Stolee's testcase showing a regression in
   "git clean -f "[1]
 * The second patch is a "simple" fix for that testcase with a pretty long
   (and slightly embarrassing though entertaining) backstory.
 * The third patch was from v1, and is a small tweak made by me to a patch
   proposed by Peff[2] (signoff from [3]) to avoid needing to create a
   dirent.
 * The fourth patch was a follow-up from Peff in response to V1 fixing a
   code comment[4].

[1] 
https://lore.kernel.org/git/pull.526.git.1579119946211.gitgitgadget@gmail.com/
[2] 
https://lore.kernel.org/git/20191219222403.GA705525@coredump.intra.peff.net/
[3] 
https://lore.kernel.org/git/20200114220357.GA3957260@coredump.intra.peff.net/
[4] 
https://lore.kernel.org/git/20200115202146.GA4091171@coredump.intra.peff.net/
[5] 
https://lore.kernel.org/git/354fa43b-0e62-1ee5-a63f-59d9b2da7d3f@gmail.com/
[6] 
https://lore.kernel.org/git/e008da66-defe-d2b0-410b-64b7754b9c6e@gmail.com/

Derrick Stolee (1):
  clean: demonstrate a bug with pathspecs

Elijah Newren (1):
  dir: treat_leading_path() and read_directory_recursive(), round 2

Jeff King (2):
  dir: restructure in a way to avoid passing around a struct dirent
  dir: point treat_leading_path() warning to the right place

 dir.c            | 83 ++++++++++++++++++++++--------------------------
 t/t7300-clean.sh |  9 ++++++
 2 files changed, 47 insertions(+), 45 deletions(-)


base-commit: d0654dc308b0ba76dd8ed7bbb33c8d8f7aacd783
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-692%2Fnewren%2Favoid-dirent-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-692/newren/avoid-dirent-v3
Pull-Request: https://github.com/git/git/pull/692

Range-diff vs v2:

 -:  ---------- > 1:  9efed585ef clean: demonstrate a bug with pathspecs
 -:  ---------- > 2:  ea95186774 dir: treat_leading_path() and read_directory_recursive(), round 2
 1:  edb18304b2 = 3:  5b8fa6e5a7 dir: restructure in a way to avoid passing around a struct dirent
 -:  ---------- > 4:  9d346f8b6b dir: point treat_leading_path() warning to the right place

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 1/4] clean: demonstrate a bug with pathspecs
  2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
@ 2020-01-16 20:21     ` Derrick Stolee via GitGitGadget
  2020-01-17 15:20       ` Derrick Stolee
  2020-01-16 20:21     ` [PATCH v3 2/4] dir: treat_leading_path() and read_directory_recursive(), round 2 Elijah Newren via GitGitGadget
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Derrick Stolee via GitGitGadget @ 2020-01-16 20:21 UTC (permalink / raw)
  To: git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren, Derrick Stolee

From: Derrick Stolee <dstolee@microsoft.com>

b9670c1f5e (dir: fix checks on common prefix directory, 2019-12-19)
modified the way pathspecs are handled when handling a directory
during "git clean -f <path>". While this improved the behavior for
known test breakages, it also regressed in how the clean command
handles cleaning a specified file.

Add a test case that demonstrates this behavior. This test passes
before b9670c1f5e then fails after.

Helped-by: Kevin Willford <Kevin.Willford@microsoft.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Reviewed-by: Elijah Newren <newren@gmail.com>
---
 t/t7300-clean.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 6e6d24c1c3..782e125c89 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -737,4 +737,13 @@ test_expect_success MINGW 'handle clean & core.longpaths = false nicely' '
 	test_i18ngrep "too long" .git/err
 '
 
+test_expect_failure 'clean untracked paths by pathspec' '
+	git init untracked &&
+	mkdir untracked/dir &&
+	echo >untracked/dir/file.txt &&
+	git -C untracked clean -f dir/file.txt &&
+	ls untracked/dir >actual &&
+	test_must_be_empty actual
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 2/4] dir: treat_leading_path() and read_directory_recursive(), round 2
  2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
  2020-01-16 20:21     ` [PATCH v3 1/4] clean: demonstrate a bug with pathspecs Derrick Stolee via GitGitGadget
@ 2020-01-16 20:21     ` Elijah Newren via GitGitGadget
  2020-01-17 15:25       ` Derrick Stolee
  2020-01-16 20:21     ` [PATCH v3 3/4] dir: restructure in a way to avoid passing around a struct dirent Jeff King via GitGitGadget
  2020-01-16 20:21     ` [PATCH v3 4/4] dir: point treat_leading_path() warning to the right place Jeff King via GitGitGadget
  3 siblings, 1 reply; 15+ messages in thread
From: Elijah Newren via GitGitGadget @ 2020-01-16 20:21 UTC (permalink / raw)
  To: git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren, Elijah Newren

From: Elijah Newren <newren@gmail.com>

I was going to title this "dir: more synchronizing of
treat_leading_path() and read_directory_recursive()", a nod to commit
777b42034764 ("dir: synchronize treat_leading_path() and
read_directory_recursive()", 2019-12-19), but the title was too long.

Anyway, first the backstory...

fill_directory() has always had a slightly error-prone interface: it
returns a subset of paths which *might* match the specified pathspec; it
was intended to prune away some paths which didn't match the specified
pathspec and keep at least all the ones that did match it.  Given this
interface, callers were responsible to post-process the results and
check whether each actually matched the pathspec.

builtin/clean.c did this.  It would first prune out duplicates (e.g. if
"dir" was returned as well as all files under "dir/", then it would
simplify this to just "dir"), and after pruning duplicates it would
compare the remaining paths to the specified pathspec(s).  This
post-processing itself could run into problems, though, as noted in
commit 404ebceda01c ("dir: also check directories for matching
pathspecs", 2019-09-17):

    For the case of git-clean and a set of pathspecs of "dir/file" and
    "more", this caused a problem because we'd end up with dir entries
    for both of
      "dir"
      "dir/file"
    Then correct_untracked_entries() would try to helpfully prune
    duplicates for us by removing "dir/file" since it's under "dir",
    leaving us with
      "dir"
    Since the original pathspec only had "dir/file", the only entry left
    doesn't match and leaves nothing to be removed.  (Note that if only
    one pathspec was specified, e.g. only "dir/file", then the
    common_prefix_len optimizations in fill_directory would cause us to
    bypass this problem, making it appear in simple tests that we could
    correctly remove manually specified pathspecs.)

That commit fixed the issue -- when multiple pathspecs were specified --
by making sure fill_directory() wouldn't return both "dir" and
"dir/file" outside the common_prefix_len optimization path.  This is
where it starts to get fun.

In commit b9670c1f5e6b ("dir: fix checks on common prefix directory",
2019-12-19), we noticed that the common_prefix_len wasn't doing
appropriate checks and letting all kinds of stuff through, resulting in
recursing into .git/ directories and other craziness.  So it started
locking down and doing checks on pathnames within that code path.  That
continued with commit 777b42034764 ("dir: synchronize
treat_leading_path() and read_directory_recursive()", 2019-12-19), which
noted the following:

    Our optimization to avoid calling into read_directory_recursive()
    when all pathspecs have a common leading directory mean that we need
    to match the logic that read_directory_recursive() would use if we
    had just called it from the root.  Since it does more than call
    treat_path() we need to copy that same logic.

...and then it more forcefully addressed the issue with this wonderfully
ironic statement:

    Needing to duplicate logic like this means it is guaranteed someone
    will eventually need to make further changes and forget to update
    both locations.  It is tempting to just nuke the leading_directory
    special casing to avoid such bugs and simplify the code, but
    unpack_trees' verify_clean_subdirectory() also calls
    read_directory() and does so with a non-empty leading path, so I'm
    hesitant to try to restructure further.  Add obnoxious warnings to
    treat_leading_path() and read_directory_recursive() to try to warn
    people of such problems.

You would think that with such a strongly worded description, that its
author would have actually ensured that the logic in
treat_leading_path() and read_directory_recursive() did actually match
and that *everything* that was needed had at least been copied over at
the time that this paragraph was written.  But you'd be wrong, I messed
it up by missing part of the logic.

Copy the missing bits to fix the new final test in t7300.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 dir.c            | 4 ++++
 t/t7300-clean.sh | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 7d255227b1..5d4c92d3aa 100644
--- a/dir.c
+++ b/dir.c
@@ -2383,6 +2383,10 @@ static int treat_leading_path(struct dir_struct *dir,
 		    (dir->flags & DIR_SHOW_IGNORED_TOO ||
 		     do_match_pathspec(istate, pathspec, sb.buf, sb.len,
 				       baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
+			if (!match_pathspec(istate, pathspec, sb.buf, sb.len,
+					    0 /* prefix */, NULL,
+					    0 /* do NOT special case dirs */))
+				state = path_none;
 			add_path_to_appropriate_result_list(dir, NULL, &cdir,
 							    istate,
 							    &sb, baselen,
diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh
index 782e125c89..cb5e34d94c 100755
--- a/t/t7300-clean.sh
+++ b/t/t7300-clean.sh
@@ -737,7 +737,7 @@ test_expect_success MINGW 'handle clean & core.longpaths = false nicely' '
 	test_i18ngrep "too long" .git/err
 '
 
-test_expect_failure 'clean untracked paths by pathspec' '
+test_expect_success 'clean untracked paths by pathspec' '
 	git init untracked &&
 	mkdir untracked/dir &&
 	echo >untracked/dir/file.txt &&
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 3/4] dir: restructure in a way to avoid passing around a struct dirent
  2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
  2020-01-16 20:21     ` [PATCH v3 1/4] clean: demonstrate a bug with pathspecs Derrick Stolee via GitGitGadget
  2020-01-16 20:21     ` [PATCH v3 2/4] dir: treat_leading_path() and read_directory_recursive(), round 2 Elijah Newren via GitGitGadget
@ 2020-01-16 20:21     ` Jeff King via GitGitGadget
  2020-01-16 20:21     ` [PATCH v3 4/4] dir: point treat_leading_path() warning to the right place Jeff King via GitGitGadget
  3 siblings, 0 replies; 15+ messages in thread
From: Jeff King via GitGitGadget @ 2020-01-16 20:21 UTC (permalink / raw)
  To: git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren, Jeff King

From: Jeff King <peff@peff.net>

Restructure the code slightly to avoid passing around a struct dirent
anywhere, which also enables us to avoid trying to manufacture one.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Elijah Newren <newren@gmail.com>
---
 dir.c | 73 +++++++++++++++++++++++++----------------------------------
 1 file changed, 31 insertions(+), 42 deletions(-)

diff --git a/dir.c b/dir.c
index 5d4c92d3aa..8437030006 100644
--- a/dir.c
+++ b/dir.c
@@ -41,7 +41,8 @@ struct cached_dir {
 	int nr_files;
 	int nr_dirs;
 
-	struct dirent *de;
+	const char *d_name;
+	int d_type;
 	const char *file;
 	struct untracked_cache_dir *ucd;
 };
@@ -50,8 +51,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	struct index_state *istate, const char *path, int len,
 	struct untracked_cache_dir *untracked,
 	int check_only, int stop_at_first_file, const struct pathspec *pathspec);
-static int get_dtype(struct dirent *de, struct index_state *istate,
-		     const char *path, int len);
+static int resolve_dtype(int dtype, struct index_state *istate,
+			 const char *path, int len);
 
 int count_slashes(const char *s)
 {
@@ -1215,8 +1216,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
 		int prefix = pattern->nowildcardlen;
 
 		if (pattern->flags & PATTERN_FLAG_MUSTBEDIR) {
-			if (*dtype == DT_UNKNOWN)
-				*dtype = get_dtype(NULL, istate, pathname, pathlen);
+			*dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
 			if (*dtype != DT_DIR)
 				continue;
 		}
@@ -1842,10 +1842,9 @@ static int get_index_dtype(struct index_state *istate,
 	return DT_UNKNOWN;
 }
 
-static int get_dtype(struct dirent *de, struct index_state *istate,
-		     const char *path, int len)
+static int resolve_dtype(int dtype, struct index_state *istate,
+			 const char *path, int len)
 {
-	int dtype = de ? DTYPE(de) : DT_UNKNOWN;
 	struct stat st;
 
 	if (dtype != DT_UNKNOWN)
@@ -1870,14 +1869,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct strbuf *path,
 					  int baselen,
 					  const struct pathspec *pathspec,
-					  int dtype, struct dirent *de)
+					  int dtype)
 {
 	int exclude;
 	int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
 	enum path_treatment path_treatment;
 
-	if (dtype == DT_UNKNOWN)
-		dtype = get_dtype(de, istate, path->buf, path->len);
+	dtype = resolve_dtype(dtype, istate, path->buf, path->len);
 
 	/* Always exclude indexed files */
 	if (dtype != DT_DIR && has_path_in_index)
@@ -1985,21 +1983,18 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 				      int baselen,
 				      const struct pathspec *pathspec)
 {
-	int dtype;
-	struct dirent *de = cdir->de;
-
-	if (!de)
+	if (!cdir->d_name)
 		return treat_path_fast(dir, untracked, cdir, istate, path,
 				       baselen, pathspec);
-	if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git"))
+	if (is_dot_or_dotdot(cdir->d_name) || !fspathcmp(cdir->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
-	strbuf_addstr(path, de->d_name);
+	strbuf_addstr(path, cdir->d_name);
 	if (simplify_away(path->buf, path->len, pathspec))
 		return path_none;
 
-	dtype = DTYPE(de);
-	return treat_one_path(dir, untracked, istate, path, baselen, pathspec, dtype, de);
+	return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
+			      cdir->d_type);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -2087,10 +2082,17 @@ static int open_cached_dir(struct cached_dir *cdir,
 
 static int read_cached_dir(struct cached_dir *cdir)
 {
+	struct dirent *de;
+
 	if (cdir->fdir) {
-		cdir->de = readdir(cdir->fdir);
-		if (!cdir->de)
+		de = readdir(cdir->fdir);
+		if (!de) {
+			cdir->d_name = NULL;
+			cdir->d_type = DT_UNKNOWN;
 			return -1;
+		}
+		cdir->d_name = de->d_name;
+		cdir->d_type = DTYPE(de);
 		return 0;
 	}
 	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
@@ -2216,7 +2218,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		/* recurse into subdir if instructed by treat_path */
 		if ((state == path_recurse) ||
 			((state == path_untracked) &&
-			 (get_dtype(cdir.de, istate, path.buf, path.len) == DT_DIR) &&
+			 (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
 			 ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
 			  (pathspec &&
 			   do_match_pathspec(istate, pathspec, path.buf, path.len,
@@ -2314,10 +2316,10 @@ static int treat_leading_path(struct dir_struct *dir,
 	 */
 
 	struct strbuf sb = STRBUF_INIT;
+	struct strbuf subdir = STRBUF_INIT;
 	int prevlen, baselen;
 	const char *cp;
 	struct cached_dir cdir;
-	struct dirent *de;
 	enum path_treatment state = path_none;
 
 	/*
@@ -2342,22 +2344,8 @@ static int treat_leading_path(struct dir_struct *dir,
 	if (!len)
 		return 1;
 
-	/*
-	 * We need a manufactured dirent with sufficient space to store a
-	 * leading directory component of path in its d_name.  Here, we
-	 * assume that the dirent's d_name is either declared as
-	 *    char d_name[BIG_ENOUGH]
-	 * or that it is declared at the end of the struct as
-	 *    char d_name[]
-	 * For either case, padding with len+1 bytes at the end will ensure
-	 * sufficient storage space.
-	 */
-	de = xcalloc(1, st_add3(sizeof(struct dirent), len, 1));
 	memset(&cdir, 0, sizeof(cdir));
-	cdir.de = de;
-#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
-	de->d_type = DT_DIR;
-#endif
+	cdir.d_type = DT_DIR;
 	baselen = 0;
 	prevlen = 0;
 	while (1) {
@@ -2374,12 +2362,13 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		strbuf_reset(&sb);
 		strbuf_add(&sb, path, prevlen);
-		memcpy(de->d_name, path+prevlen, baselen-prevlen);
-		de->d_name[baselen-prevlen] = '\0';
+		strbuf_reset(&subdir);
+		strbuf_add(&subdir, path+prevlen, baselen-prevlen);
+		cdir.d_name = subdir.buf;
 		state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
 				    pathspec);
 		if (state == path_untracked &&
-		    get_dtype(cdir.de, istate, sb.buf, sb.len) == DT_DIR &&
+		    resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
 		    (dir->flags & DIR_SHOW_IGNORED_TOO ||
 		     do_match_pathspec(istate, pathspec, sb.buf, sb.len,
 				       baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
@@ -2403,7 +2392,7 @@ static int treat_leading_path(struct dir_struct *dir,
 					    &sb, baselen, pathspec,
 					    state);
 
-	free(de);
+	strbuf_release(&subdir);
 	strbuf_release(&sb);
 	return state == path_recurse;
 }
-- 
gitgitgadget


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v3 4/4] dir: point treat_leading_path() warning to the right place
  2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
                       ` (2 preceding siblings ...)
  2020-01-16 20:21     ` [PATCH v3 3/4] dir: restructure in a way to avoid passing around a struct dirent Jeff King via GitGitGadget
@ 2020-01-16 20:21     ` Jeff King via GitGitGadget
  3 siblings, 0 replies; 15+ messages in thread
From: Jeff King via GitGitGadget @ 2020-01-16 20:21 UTC (permalink / raw)
  To: git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren, Jeff King

From: Jeff King <peff@peff.net>

Commit 777b420347 (dir: synchronize treat_leading_path() and
read_directory_recursive(), 2019-12-19) tried to add two warning
comments in those functions, pointing at each other. But the one in
treat_leading_path() just points at itself.

Let's fix that. Since the comment also redirects the reader for more
details to "the commit that added this warning", and since we're now
modifying the warning (creating a new commit without those details),
let's mention the actual commit id.

Signed-off-by: Jeff King <peff@peff.net>
Reviewed-by: Elijah Newren <newren@gmail.com>
---
 dir.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 8437030006..b460211e61 100644
--- a/dir.c
+++ b/dir.c
@@ -2310,9 +2310,9 @@ static int treat_leading_path(struct dir_struct *dir,
 	 * WARNING WARNING WARNING:
 	 *
 	 * Any updates to the traversal logic here may need corresponding
-	 * updates in treat_leading_path().  See the commit message for the
-	 * commit adding this warning as well as the commit preceding it
-	 * for details.
+	 * updates in read_directory_recursive().  See 777b420347 (dir:
+	 * synchronize treat_leading_path() and read_directory_recursive(),
+	 * 2019-12-19) and its parent commit for details.
 	 */
 
 	struct strbuf sb = STRBUF_INIT;
-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/4] clean: demonstrate a bug with pathspecs
  2020-01-16 20:21     ` [PATCH v3 1/4] clean: demonstrate a bug with pathspecs Derrick Stolee via GitGitGadget
@ 2020-01-17 15:20       ` Derrick Stolee
  2020-01-17 16:53         ` Elijah Newren
  0 siblings, 1 reply; 15+ messages in thread
From: Derrick Stolee @ 2020-01-17 15:20 UTC (permalink / raw)
  To: Derrick Stolee via GitGitGadget, git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren

On 1/16/2020 3:21 PM, Derrick Stolee via GitGitGadget wrote:
> From: Derrick Stolee <dstolee@microsoft.com>
> 
> b9670c1f5e (dir: fix checks on common prefix directory, 2019-12-19)

Thanks for fixing my typo here.

> modified the way pathspecs are handled when handling a directory
> during "git clean -f <path>". While this improved the behavior for
> known test breakages, it also regressed in how the clean command
> handles cleaning a specified file.
> 
> Add a test case that demonstrates this behavior. This test passes
> before b9670c1f5e then fails after.
> 
> Helped-by: Kevin Willford <Kevin.Willford@microsoft.com>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> Reviewed-by: Elijah Newren <newren@gmail.com>

Do we need your sign-off here, too?

-Stolee


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/4] dir: treat_leading_path() and read_directory_recursive(), round 2
  2020-01-16 20:21     ` [PATCH v3 2/4] dir: treat_leading_path() and read_directory_recursive(), round 2 Elijah Newren via GitGitGadget
@ 2020-01-17 15:25       ` Derrick Stolee
  0 siblings, 0 replies; 15+ messages in thread
From: Derrick Stolee @ 2020-01-17 15:25 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget, git
  Cc: Jeff King, Johannes Schindelin, Derrick Stolee, Kevin.Willford,
	Kyle Meyer, Jonathan Nieder, Elijah Newren

On 1/16/2020 3:21 PM, Elijah Newren via GitGitGadget wrote:
> From: Elijah Newren <newren@gmail.com>
> 
> I was going to title this "dir: more synchronizing of
> treat_leading_path() and read_directory_recursive()", a nod to commit
> 777b42034764 ("dir: synchronize treat_leading_path() and
> read_directory_recursive()", 2019-12-19), but the title was too long.
> 
> Anyway, first the backstory...

The backstory was helpful, thanks.

> Copy the missing bits to fix the new final test in t7300.

Thanks for the quick turnaround!

-Stolee


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/4] clean: demonstrate a bug with pathspecs
  2020-01-17 15:20       ` Derrick Stolee
@ 2020-01-17 16:53         ` Elijah Newren
  0 siblings, 0 replies; 15+ messages in thread
From: Elijah Newren @ 2020-01-17 16:53 UTC (permalink / raw)
  To: Derrick Stolee
  Cc: Derrick Stolee via GitGitGadget, Git Mailing List, Jeff King,
	Johannes Schindelin, Derrick Stolee, Kevin.Willford, Kyle Meyer,
	Jonathan Nieder

On Fri, Jan 17, 2020 at 7:20 AM Derrick Stolee <stolee@gmail.com> wrote:
>
> On 1/16/2020 3:21 PM, Derrick Stolee via GitGitGadget wrote:
> > From: Derrick Stolee <dstolee@microsoft.com>
> >
> > b9670c1f5e (dir: fix checks on common prefix directory, 2019-12-19)
>
> Thanks for fixing my typo here.
>
> > modified the way pathspecs are handled when handling a directory
> > during "git clean -f <path>". While this improved the behavior for
> > known test breakages, it also regressed in how the clean command
> > handles cleaning a specified file.
> >
> > Add a test case that demonstrates this behavior. This test passes
> > before b9670c1f5e then fails after.
> >
> > Helped-by: Kevin Willford <Kevin.Willford@microsoft.com>
> > Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> > Reviewed-by: Elijah Newren <newren@gmail.com>
>
> Do we need your sign-off here, too?

I figured I didn't change the code, and the list already independently
saw your submission and sign-off, so I assumed that a "Reviewed-by"
was more appropriate.  If I'm wrong or people would just prefer me to
also add a Signed-off-by, I'm happy to add one.  And if it is, then it
should also be added to the last patch in the series from Peff.

If Junio wants it and it's easier for him to just edit-in for both of
these commits:
   Signed-off-by: Elijah Newren <newren@gmail.com>

or I can send a re-roll.  But I'll assume it's not necessary if I
don't hear otherwise.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, back to index

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-14 16:32 [PATCH] dir: restructure in a way to avoid passing around a struct dirent Elijah Newren via GitGitGadget
2020-01-14 21:07 ` Johannes Schindelin
2020-01-14 21:13   ` Elijah Newren
2020-01-14 22:03 ` Jeff King
2020-01-15 14:21 ` [PATCH v2] " Elijah Newren via GitGitGadget
2020-01-15 20:21   ` [PATCH] dir: point treat_leading_path() warning to the right place Jeff King
2020-01-15 20:28     ` Elijah Newren
2020-01-16 20:21   ` [PATCH v3 0/4] dir: more fill_directory() fixes Elijah Newren via GitGitGadget
2020-01-16 20:21     ` [PATCH v3 1/4] clean: demonstrate a bug with pathspecs Derrick Stolee via GitGitGadget
2020-01-17 15:20       ` Derrick Stolee
2020-01-17 16:53         ` Elijah Newren
2020-01-16 20:21     ` [PATCH v3 2/4] dir: treat_leading_path() and read_directory_recursive(), round 2 Elijah Newren via GitGitGadget
2020-01-17 15:25       ` Derrick Stolee
2020-01-16 20:21     ` [PATCH v3 3/4] dir: restructure in a way to avoid passing around a struct dirent Jeff King via GitGitGadget
2020-01-16 20:21     ` [PATCH v3 4/4] dir: point treat_leading_path() warning to the right place Jeff King via GitGitGadget

git@vger.kernel.org list mirror (unofficial, one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git