git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jameson Miller <jameson.miller81@gmail.com>
To: jameson.miller81@gmail.com
Cc: bmwill@google.com, git@vger.kernel.org, gitster@pobox.com,
	jamill@microsoft.com, peff@peff.net, sxlijin@gmail.com
Subject: [PATCH v2] Improve performance of git status --ignored
Date: Mon, 18 Sep 2017 13:24:33 -0400	[thread overview]
Message-ID: <1505755473-6720-2-git-send-email-jamill@microsoft.com> (raw)
In-Reply-To: <1505755473-6720-1-git-send-email-jamill@microsoft.com>

Improve the performance of the directory listing logic when it wants to list
non-empty ignored directories. In order to show non-empty ignored directories,
the existing logic will recursively iterate through all contents of an ignored
directory. This change introduces the optimization to stop iterating through
the contents once it finds the first file. This can have a significant
improvement in 'git status --ignored' performance in repositories with a large
number of files in ignored directories.

For an example of the performance difference on an example repository with
196,000 files in 400 ignored directories:

| Command                    |  Time (s) |
| -------------------------- | --------- |
| git status                 |   1.2     |
| git status --ignored (old) |   3.9     |
| git status --ignored (new) |   1.4     |

Signed-off-by: Jameson Miller <jamill@microsoft.com>
---
 dir.c | 47 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/dir.c b/dir.c
index 1c55dc3..1d17b80 100644
--- a/dir.c
+++ b/dir.c
@@ -49,7 +49,7 @@ struct cached_dir {
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	struct index_state *istate, const char *path, int len,
 	struct untracked_cache_dir *untracked,
-	int check_only, const struct pathspec *pathspec);
+	int check_only, int stop_at_first_file, const struct pathspec *pathspec);
 static int get_dtype(struct dirent *de, struct index_state *istate,
 		     const char *path, int len);
 
@@ -1404,8 +1404,13 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 
 	untracked = lookup_untracked(dir->untracked, untracked,
 				     dirname + baselen, len - baselen);
+
+	/*
+	 * If this is an excluded directory, then we only need to check if
+	 * the directory contains any files.
+	 */
 	return read_directory_recursive(dir, istate, dirname, len,
-					untracked, 1, pathspec);
+					untracked, 1, exclude, pathspec);
 }
 
 /*
@@ -1633,7 +1638,7 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
 		 * with check_only set.
 		 */
 		return read_directory_recursive(dir, istate, path->buf, path->len,
-						cdir->ucd, 1, pathspec);
+						cdir->ucd, 1, 0, pathspec);
 	/*
 	 * We get path_recurse in the first run when
 	 * directory_exists_in_index() returns index_nonexistent. We
@@ -1793,12 +1798,20 @@ static void close_cached_dir(struct cached_dir *cdir)
  * Also, we ignore the name ".git" (even if it is not a directory).
  * That likely will not change.
  *
+ * If 'stop_at_first_file' is specified, 'path_excluded' is returned
+ * to signal that a file was found. This is the least significant value that
+ * indicates that a file was encountered that does not depend on the order of
+ * whether an untracked or exluded path was encountered first.
+ *
  * Returns the most significant path_treatment value encountered in the scan.
+ * If 'stop_at_first_file' is specified, `path_excluded` is the most
+ * significant path_treatment value that will be returned.
  */
+
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	struct index_state *istate, const char *base, int baselen,
 	struct untracked_cache_dir *untracked, int check_only,
-	const struct pathspec *pathspec)
+	int stop_at_first_file, const struct pathspec *pathspec)
 {
 	struct cached_dir cdir;
 	enum path_treatment state, subdir_state, dir_state = path_none;
@@ -1832,12 +1845,34 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			subdir_state =
 				read_directory_recursive(dir, istate, path.buf,
 							 path.len, ud,
-							 check_only, pathspec);
+							 check_only, stop_at_first_file, pathspec);
 			if (subdir_state > dir_state)
 				dir_state = subdir_state;
 		}
 
 		if (check_only) {
+			if (stop_at_first_file) {
+				/*
+				 * If stopping at first file, then
+				 * signal that a file was found by
+				 * returning `path_excluded`. This is
+				 * to return a consistent value
+				 * regardless of whether an ignored or
+				 * excluded file happened to be
+				 * encountered 1st.
+				 *
+				 * In current usage, the
+				 * `stop_at_first_file` is passed when
+				 * an ancestor directory has matched
+				 * an exclude pattern, so any found
+				 * files will be excluded.
+				 */
+				if (dir_state >= path_excluded) {
+					dir_state = path_excluded;
+					break;
+				}
+			}
+
 			/* abort early if maximum state has been reached */
 			if (dir_state == path_untracked) {
 				if (cdir.fdir)
@@ -2108,7 +2143,7 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
 		 */
 		dir->untracked = NULL;
 	if (!len || treat_leading_path(dir, istate, path, len, pathspec))
-		read_directory_recursive(dir, istate, path, len, untracked, 0, pathspec);
+		read_directory_recursive(dir, istate, path, len, untracked, 0, 0, pathspec);
 	QSORT(dir->entries, dir->nr, cmp_dir_entry);
 	QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry);
 
-- 
2.7.4


  reply	other threads:[~2017-09-18 17:24 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-10 18:49 [PATCH v1 0/1] Teach status to show ignored directories Jameson Miller
2017-08-10 18:49 ` [PATCH v1 1/1] dir: teach " Jameson Miller
2017-08-10 20:03   ` Stefan Beller
2017-08-11 17:48     ` Jameson Miller
2017-08-14 21:05       ` Stefan Beller
2017-08-11 17:39   ` Brandon Williams
2017-08-11 18:29     ` Jameson Miller
2017-09-18 17:24 ` [PATCH v2] Improve performance of git status --ignored Jameson Miller
2017-09-18 17:24   ` Jameson Miller [this message]
2017-09-19  3:27     ` Junio C Hamano
2017-09-19 17:52     ` Brandon Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1505755473-6720-2-git-send-email-jamill@microsoft.com \
    --to=jameson.miller81@gmail.com \
    --cc=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jamill@microsoft.com \
    --cc=peff@peff.net \
    --cc=sxlijin@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).