git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Steffen Prohaska <prohaska@zib.de>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org,
	Johannes Schindelin <Johannes.Schindelin@gmx.de>,
	Johannes Sixt <j6t@kdbg.org>,
	Marius Storm-Olsen <marius@trolltech.com>,
	Steffen Prohaska <prohaska@zib.de>
Subject: [PATCH 08/11] MinGW readdir reimplementation to support d_type
Date: Sun, 31 May 2009 18:15:22 +0200	[thread overview]
Message-ID: <1243786525-4493-9-git-send-email-prohaska@zib.de> (raw)
In-Reply-To: <1243786525-4493-8-git-send-email-prohaska@zib.de>

From: Marius Storm-Olsen <marius@trolltech.com>

The original readdir implementation was fast, but didn't
support the d_type. This means that git would do additional
lstats for each entry, to figure out if the entry was a
directory or not. This unneedingly slowed down many
operations, since Windows API provides this information
directly when walking the directories.

By running this implementation on Moe's repo structure:
  mkdir bummer && cd bummer; for ((i=0;i<100;i++)); do
    mkdir $i && pushd $i;
      for ((j=0;j<1000;j++)); do echo "$j" >$j; done;
    popd;
  done

We see the following speedups:
  git add .
  -------------------
  old: 00:00:23(.087)
  new: 00:00:21(.512) 1.07x

  git status
  -------------------
  old: 00:00:03(.306)
  new: 00:00:01(.684) 1.96x

  git clean -dxf
  -------------------
  old: 00:00:01(.918)
  new: 00:00:00(.295) 6.50x

Signed-off-by: Marius Storm-Olsen <marius@trolltech.com>
Signed-off-by: Steffen Prohaska <prohaska@zib.de>
---
 compat/mingw.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 compat/mingw.h |   29 +++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 0 deletions(-)

diff --git a/compat/mingw.c b/compat/mingw.c
index e190fdd..d85d680 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -1171,3 +1171,62 @@ char *getpass(const char *prompt)
 	fputs("\n", stderr);
 	return strbuf_detach(&buf, NULL);
 }
+
+#ifndef NO_MINGW_REPLACE_READDIR
+/* MinGW readdir implementation to avoid extra lstats for Git */
+struct mingw_DIR
+{
+	struct _finddata_t	dd_dta;		/* disk transfer area for this dir */
+	struct mingw_dirent	dd_dir;		/* Our own implementation, including d_type */
+	long			dd_handle;	/* _findnext handle */
+	int			dd_stat; 	/* 0 = next entry to read is first entry, -1 = off the end, positive = 0 based index of next entry */
+	char			dd_name[1]; 	/* given path for dir with search pattern (struct is extended) */
+};
+
+struct dirent *mingw_readdir(DIR *dir)
+{
+	WIN32_FIND_DATAA buf;
+	HANDLE handle;
+	struct mingw_DIR *mdir = (struct mingw_DIR*)dir;
+
+	if (!dir->dd_handle) {
+		errno = EBADF; /* No set_errno for mingw */
+		return NULL;
+	}
+
+	if (dir->dd_handle == (long)INVALID_HANDLE_VALUE && dir->dd_stat == 0)
+	{
+		handle = FindFirstFileA(dir->dd_name, &buf);
+		DWORD lasterr = GetLastError();
+		dir->dd_handle = (long)handle;
+		if (handle == INVALID_HANDLE_VALUE && (lasterr != ERROR_NO_MORE_FILES)) {
+			errno = err_win_to_posix(lasterr);
+			return NULL;
+		}
+	} else if (dir->dd_handle == (long)INVALID_HANDLE_VALUE) {
+		return NULL;
+	} else if (!FindNextFileA((HANDLE)dir->dd_handle, &buf)) {
+		DWORD lasterr = GetLastError();
+		FindClose((HANDLE)dir->dd_handle);
+		dir->dd_handle = (long)INVALID_HANDLE_VALUE;
+		/* POSIX says you shouldn't set errno when readdir can't
+		   find any more files; so, if another error we leave it set. */
+		if (lasterr != ERROR_NO_MORE_FILES)
+			errno = err_win_to_posix(lasterr);
+		return NULL;
+	}
+
+	/* We get here if `buf' contains valid data.  */
+	strcpy(dir->dd_dir.d_name, buf.cFileName);
+	++dir->dd_stat;
+
+	/* Set file type, based on WIN32_FIND_DATA */
+	mdir->dd_dir.d_type = 0;
+	if (buf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+		mdir->dd_dir.d_type |= DT_DIR;
+	else
+		mdir->dd_dir.d_type |= DT_REG;
+
+	return (struct dirent*)&dir->dd_dir;
+}
+#endif // !NO_MINGW_REPLACE_READDIR
diff --git a/compat/mingw.h b/compat/mingw.h
index 4c50f5b..4f7ba4c 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -235,3 +235,32 @@ int main(int argc, const char **argv) \
 	return mingw_main(argc, argv); \
 } \
 static int mingw_main(c,v)
+
+#ifndef NO_MINGW_REPLACE_READDIR
+/*
+ * A replacement of readdir, to ensure that it reads the file type at
+ * the same time. This avoid extra unneeded lstats in git on MinGW
+ */
+#undef DT_UNKNOWN
+#undef DT_DIR
+#undef DT_REG
+#undef DT_LNK
+#define DT_UNKNOWN	0
+#define DT_DIR		1
+#define DT_REG		2
+#define DT_LNK		3
+
+struct mingw_dirent
+{
+	long		d_ino;			/* Always zero. */
+	union {
+		unsigned short	d_reclen;	/* Always zero. */
+		unsigned char   d_type;		/* Reimplementation adds this */
+	};
+	unsigned short	d_namlen;		/* Length of name in d_name. */
+	char		d_name[FILENAME_MAX];	/* File name. */
+};
+#define dirent mingw_dirent
+#define readdir(x) mingw_readdir(x)
+struct dirent *mingw_readdir(DIR *dir);
+#endif // !NO_MINGW_REPLACE_READDIR
-- 
1.6.3.1.54.g99dd

  reply	other threads:[~2009-05-31 16:16 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-31 16:15 [PATCH 00/11] Various msysgit patches Steffen Prohaska
2009-05-31 16:15 ` [PATCH 01/11] MinGW: GCC >= 4 does not need SNPRINTF_SIZE_CORR anymore Steffen Prohaska
2009-05-31 16:15   ` [PATCH 02/11] Quiet make: do not leave Windows behind Steffen Prohaska
2009-05-31 16:15     ` [PATCH 03/11] Work around a regression in Windows 7, causing erase_in_line() to crash sometimes Steffen Prohaska
2009-05-31 16:15       ` [PATCH 04/11] test-chmtime: work around Windows limitation Steffen Prohaska
2009-05-31 16:15         ` [PATCH 05/11] winansi: fix compile warnings Steffen Prohaska
2009-05-31 16:15           ` [PATCH 06/11] git: browsing paths with spaces when using the start command Steffen Prohaska
2009-05-31 16:15             ` [PATCH 07/11] connect.c: Support PuTTY plink and TortoisePlink as SSH on Windows Steffen Prohaska
2009-05-31 16:15               ` Steffen Prohaska [this message]
     [not found]                 ` <1243786525-4493-10-git-send-email-prohaska@zib.de>
2009-05-31 16:15                   ` [PATCH 10/11] Fix warnings in nedmalloc when compiling with GCC 4.4.0 Steffen Prohaska
2009-05-31 16:15                     ` [PATCH 11/11] MinGW: Teach Makefile to detect msysgit and apply specific settings Steffen Prohaska
2009-06-01  7:43                     ` [PATCH 10/11] Fix warnings in nedmalloc when compiling with GCC 4.4.0 Junio C Hamano
2009-06-01  8:57                       ` Johannes Schindelin
2009-06-01 16:33                         ` Junio C Hamano
2009-06-01 23:38                           ` GeunSik Lim
2009-06-02 12:52                           ` Johannes Schindelin
2009-06-02 15:50                             ` Junio C Hamano
2009-06-03 12:57                               ` Johannes Schindelin
     [not found]                                 ` <7vprdl9qon.fsf@alter.siames  e.dyndns.org>
2009-06-03 17:23                                 ` Junio C Hamano
2009-06-08 14:46                                   ` [PATCH 10/11 v2] " Johannes Schindelin
2009-06-08 16:50                                     ` Junio C Hamano
2009-06-08 18:24                                       ` Johannes Sixt
2009-06-09  6:06                                       ` Steffen Prohaska
2009-06-10  9:27                                         ` Johannes Sixt
2009-06-11 20:52                                           ` [PATCH] Fix typo in nedmalloc warning fix Johannes Sixt
2009-06-11 20:56                                             ` [PATCH] compat/ has subdirectories: do not omit them in 'make clean' Johannes Sixt
2009-06-11 21:13                                             ` [PATCH] Fix typo in nedmalloc warning fix Johannes Schindelin
2009-05-31 18:03           ` [PATCH 05/11] winansi: fix compile warnings Johannes Sixt
2009-05-31 19:52             ` Johannes Sixt
2009-06-01  6:41               ` [PATCH 05/11 v2] MinGW: fix warning about implicit declaration of _getch() Steffen Prohaska
2009-06-01  6:04             ` [PATCH 03/11 v2] Work around a regression in Windows 7, causing erase_in_line() to crash sometimes Steffen Prohaska
2009-06-01  8:25               ` Johannes Schindelin
2009-06-01 10:26                 ` Steffen Prohaska
2009-06-01 10:41                   ` Johannes Schindelin
2009-06-01  7:43         ` [PATCH 04/11] test-chmtime: work around Windows limitation Junio C Hamano
2009-06-01  9:00           ` Johannes Schindelin
2009-05-31 20:06 ` [PATCH 00/11] Various msysgit patches Johannes Sixt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243786525-4493-9-git-send-email-prohaska@zib.de \
    --to=prohaska@zib.de \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=j6t@kdbg.org \
    --cc=marius@trolltech.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).