[PATCH v4 0/6] Fast git status via a file system watcher

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* [PATCH v4 0/6] Fast git status via a file system watcher
@ 2017-06-01 15:50 Ben Peart
  2017-06-01 15:51 ` [PATCH v4 1/6] bswap: add 64 bit endianness helper get_be64 Ben Peart
                   ` (8 more replies)
  0 siblings, 9 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:50 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

Changes from V3 include:
 - update test script based on feedback
 - update template hook proc with better post-processing code and make
   it executable

Ben Peart (6):
  bswap: add 64 bit endianness helper get_be64
  dir: make lookup_untracked() available outside of dir.c
  fsmonitor: teach git to optionally utilize a file system monitor to
    speed up detecting new or changed files.
  fsmonitor: add test cases for fsmonitor extension
  fsmonitor: add documentation for the fsmonitor extension.
  fsmonitor: add a sample query-fsmonitor hook script for Watchman

 Documentation/config.txt                 |   7 +
 Documentation/githooks.txt               |  23 +++
 Documentation/technical/index-format.txt |  19 +++
 Makefile                                 |   1 +
 builtin/update-index.c                   |   1 +
 cache.h                                  |   5 +
 compat/bswap.h                           |   4 +
 config.c                                 |   5 +
 dir.c                                    |  16 ++-
 dir.h                                    |   5 +
 entry.c                                  |   1 +
 environment.c                            |   1 +
 fsmonitor.c                              | 238 +++++++++++++++++++++++++++++++
 fsmonitor.h                              |   9 ++
 read-cache.c                             |  28 +++-
 t/t7519-status-fsmonitor.sh              | 173 ++++++++++++++++++++++
 templates/hooks--query-fsmonitor.sample  |  60 ++++++++
 unpack-trees.c                           |   1 +
 18 files changed, 594 insertions(+), 3 deletions(-)
 create mode 100644 fsmonitor.c
 create mode 100644 fsmonitor.h
 create mode 100755 t/t7519-status-fsmonitor.sh
 create mode 100755 templates/hooks--query-fsmonitor.sample

Interdiff (v3..v4):

diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 395db46d55..458eabe6dc 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -5,48 +5,46 @@ test_description='git status with file system watcher'
 . ./test-lib.sh
 
 clean_repo () {
-	git reset --hard HEAD
-	git clean -fd
+	git reset --hard HEAD &&
+	git clean -fd &&
 	rm -f marker
 }
 
 dirty_repo () {
-	: >untracked
-	: >dir1/untracked
-	: >dir2/untracked
-	echo 1 >modified
-	echo 2 >dir1/modified
-	echo 3 >dir2/modified
-	echo 4 >new
-	echo 5 >dir1/new
-	echo 6 >dir2/new
-	git add new
-	git add dir1/new
+	: >untracked &&
+	: >dir1/untracked &&
+	: >dir2/untracked &&
+	echo 1 >modified &&
+	echo 2 >dir1/modified &&
+	echo 3 >dir2/modified &&
+	echo 4 >new &&
+	echo 5 >dir1/new &&
+	echo 6 >dir2/new &&
+	git add new &&
+	git add dir1/new &&
 	git add dir2/new
 }
 
 # The test query-fsmonitor hook proc will output a marker file we can use to
 # ensure the hook was actually used to generate the correct results.
 
+# fsmonitor works correctly with or without the untracked cache
+# but if it is available, we'll turn it on to ensure we test that
+# codepath as well.
+
+test_lazy_prereq UNTRACKED_CACHE '
+	{ git update-index --test-untracked-cache; ret=$?; } &&
+	test $ret -ne 1
+'
+
+if test_have_prereq UNTRACKED_CACHE; then
+	git config core.untrackedcache true
+else
+	git config core.untrackedcache false
+fi
+
 test_expect_success 'setup' '
 	mkdir -p .git/hooks &&
-	write_script .git/hooks/query-fsmonitor<<-\EOF &&
-	if [ $1 -ne 1 ]
-	then
-		echo -e "Unsupported query-fsmonitor hook version.\n" >&2
-		exit 1;
-	fi
-	: >marker
-	printf "untracked\0"
-	printf "dir1/untracked\0"
-	printf "dir2/untracked\0"
-	printf "modified\0"
-	printf "dir1/modified\0"
-	printf "dir2/modified\0"
-	printf "new\0""
-	printf "dir1/new\0"
-	printf "dir2/new\0"
-	EOF
 	: >tracked &&
 	: >modified &&
 	mkdir dir1 &&
@@ -58,55 +56,19 @@ test_expect_success 'setup' '
 	git add . &&
 	test_tick &&
 	git commit -m initial &&
-	dirty_repo
-'
-
-cat >.gitignore <<\EOF
-.gitignore
-expect*
-output*
-marker*
-EOF
-
-# Status is well tested elsewhere so we'll just ensure that the results are
-# the same when using core.fsmonitor. First call after turning on the option
-# does a complete scan so need to do two calls to ensure we test the new
-# codepath.
-
-test_expect_success 'status with core.untrackedcache true' '
-	git config core.fsmonitor true  &&
-	git config core.untrackedcache true &&
-	git -c core.fsmonitor=false -c core.untrackedcache=true status >expect &&
-	clean_repo &&
-	git status &&
-	test_path_is_missing marker &&
-	dirty_repo &&
-	git status >output &&
-	test_path_is_file marker &&
-	test_i18ncmp expect output
-'
-
-
-test_expect_success 'status with core.untrackedcache false' '
 	git config core.fsmonitor true &&
-	git config core.untrackedcache false &&
-	git -c core.fsmonitor=false -c core.untrackedcache=false status >expect &&
-	clean_repo &&
-	git status &&
-	test_path_is_missing marker &&
-	dirty_repo &&
-	git status >output &&
-	test_path_is_file marker &&
-	test_i18ncmp expect output
+	cat >.gitignore <<-\EOF
+	.gitignore
+	expect*
+	output*
+	marker*
+	EOF
 '
 
 # Ensure commands that call refresh_index() to move the index back in time
 # properly invalidate the fsmonitor cache
 
 test_expect_success 'refresh_index() invalidates fsmonitor cache' '
-	git config core.fsmonitor true &&
-	git config core.untrackedcache true &&
-	clean_repo &&
 	git status &&
 	test_path_is_missing marker &&
 	dirty_repo &&
@@ -118,6 +80,7 @@ test_expect_success 'refresh_index() invalidates fsmonitor cache' '
 	git status &&
 	test_path_is_file marker &&
 	git reset HEAD~1 &&
+	rm -f marker &&
 	git status >output &&
 	test_path_is_file marker &&
 	git -c core.fsmonitor=false status >expect &&
@@ -129,9 +92,7 @@ test_expect_success 'refresh_index() invalidates fsmonitor cache' '
 # extensions exist other than 'TREE' so do a "git status" to get the extension
 # written before testing the results.
 
-test_expect_success 'status doesnt detect unreported modifications' '
-	git config core.fsmonitor true &&
-	git config core.untrackedcache true &&
+test_expect_success "status doesn't detect unreported modifications" '
 	write_script .git/hooks/query-fsmonitor<<-\EOF &&
 	:>marker
 	EOF
@@ -146,13 +107,67 @@ test_expect_success 'status doesnt detect unreported modifications' '
 	test_i18ngrep ! "Untracked files:" output &&
 	write_script .git/hooks/query-fsmonitor<<-\EOF &&
 	:>marker
-	printf "untracked%s\0"
+	printf "untracked\0"
 	printf "dir1/modified\0"
 	EOF
+	rm -f marker &&
 	git status >output &&
 	test_path_is_file marker &&
 	test_i18ngrep "Changes not staged for commit:" output &&
 	test_i18ngrep "Untracked files:" output
 '
 
+# Status is well tested elsewhere so we'll just ensure that the results are
+# the same when using core.fsmonitor. First call after turning on the option
+# does a complete scan so we need to do two calls to ensure we test the new
+# codepath.
+
+test_expect_success 'status with core.untrackedcache false' '
+	git config core.untrackedcache false &&
+	write_script .git/hooks/query-fsmonitor<<-\EOF &&
+	if [ $1 -ne 1 ]
+	then
+		echo -e "Unsupported query-fsmonitor hook version.\n" >&2
+		exit 1;
+	fi
+	: >marker
+	printf "untracked\0"
+	printf "dir1/untracked\0"
+	printf "dir2/untracked\0"
+	printf "modified\0"
+	printf "dir1/modified\0"
+	printf "dir2/modified\0"
+	printf "new\0""
+	printf "dir1/new\0"
+	printf "dir2/new\0"
+	EOF
+	clean_repo &&
+	dirty_repo &&
+	git -c core.fsmonitor=false status >expect &&
+	clean_repo &&
+	git status &&
+	test_path_is_missing marker &&
+	dirty_repo &&
+	git status >output &&
+	test_path_is_file marker &&
+	test_i18ncmp expect output
+'
+
+if ! test_have_prereq UNTRACKED_CACHE; then
+	skip_all='This system does not support untracked cache'
+	test_done
+fi
+
+test_expect_success 'status with core.untrackedcache true' '
+	git config core.untrackedcache true &&
+	git -c core.fsmonitor=false status >expect &&
+	clean_repo &&
+	git status &&
+	test_path_is_missing marker &&
+	dirty_repo &&
+	git status >output &&
+	test_path_is_file marker &&
+	test_i18ncmp expect output
+'
+
 test_done
diff --git a/templates/hooks--query-fsmonitor.sample b/templates/hooks--query-fsmonitor.sample
old mode 100644
new mode 100755
index 615f3332fa..941c4c5b57
--- a/templates/hooks--query-fsmonitor.sample
+++ b/templates/hooks--query-fsmonitor.sample
@@ -4,10 +4,10 @@
 # (https://facebook.github.io/watchman/) with git to provide fast
 # git status.
 #
-# The hook is passed a time in nanoseconds formatted as a string and
-# outputs to stdout all files that have been modified since the given
-# time. Paths must be relative to the root of the working tree and
-# separated by a single NUL.
+# The hook is passed a version (currently 1) and a time in nanoseconds
+# formatted as a string and outputs to stdout all files that have been
+# modified since the given time. Paths must be relative to the root of
+# the working tree and separated by a single NUL.
 #
 # To enable this hook, rename this file to "query-fsmonitor"
 
@@ -33,5 +33,28 @@ esac
 
 # Query Watchman for all the changes since the requested time
 echo "[\"query\", \"$GIT_WORK_TREE\", {\"since\": $time_t, \"fields\":[\"name\"]}]" | \
-watchman -j | \
-perl -e 'use JSON::PP; my $o = JSON::PP->new->utf8->decode(join("", <>)); die "Watchman: $o->{'error'}.\nFalling back to scanning...\n" if defined($o->{"error"}); print(join("\0", @{$o->{"files"}}));'
+	watchman -j |
+	perl -0666 -e '
+		use strict;
+		use warnings;
+
+		my $stdin = <>;
+		die "Watchman: command returned no output.\nFalling back to scanning...\n" if $stdin eq "";
+		die "Watchman: command returned invalid output: $stdin\nFalling back to scanning...\n" unless $stdin =~ /^\{/;
+
+		my $json_pkg;
+		eval {
+			require JSON::XS;
+			$json_pkg = "JSON::XS";
+			1;
+		} or do {
+			require JSON::PP;
+			$json_pkg = "JSON::PP";
+		};
+
+		my $o = $json_pkg->new->utf8->decode($stdin);
+		die "Watchman: $o->{error}.\nFalling back to scanning...\n" if $o->{error};
+
+		local $, = "\0";
+		print @{$o->{files}};
+	'

-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v4 1/6] bswap: add 64 bit endianness helper get_be64
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
@ 2017-06-01 15:51 ` Ben Peart
  2017-06-01 15:51 ` [PATCH v4 2/6] dir: make lookup_untracked() available outside of dir.c Ben Peart
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:51 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

Add a new get_be64 macro to enable 64 bit endian conversions on memory
that may or may not be aligned.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---
 compat/bswap.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compat/bswap.h b/compat/bswap.h
index d47c003544..f89fe7f4b5 100644
--- a/compat/bswap.h
+++ b/compat/bswap.h
@@ -158,6 +158,7 @@ static inline uint64_t git_bswap64(uint64_t x)
 
 #define get_be16(p)	ntohs(*(unsigned short *)(p))
 #define get_be32(p)	ntohl(*(unsigned int *)(p))
+#define get_be64(p)	ntohll(*(uint64_t *)(p))
 #define put_be32(p, v)	do { *(unsigned int *)(p) = htonl(v); } while (0)
 
 #else
@@ -170,6 +171,9 @@ static inline uint64_t git_bswap64(uint64_t x)
 	(*((unsigned char *)(p) + 1) << 16) | \
 	(*((unsigned char *)(p) + 2) <<  8) | \
 	(*((unsigned char *)(p) + 3) <<  0) )
+#define get_be64(p)	( \
+	((uint64_t)get_be32((unsigned char *)(p) + 0) << 32) | \
+	((uint64_t)get_be32((unsigned char *)(p) + 4) <<  0)
 #define put_be32(p, v)	do { \
 	unsigned int __v = (v); \
 	*((unsigned char *)(p) + 0) = __v >> 24; \
-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v4 2/6] dir: make lookup_untracked() available outside of dir.c
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
  2017-06-01 15:51 ` [PATCH v4 1/6] bswap: add 64 bit endianness helper get_be64 Ben Peart
@ 2017-06-01 15:51 ` Ben Peart
  2017-06-01 15:51 ` [PATCH v4 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files Ben Peart
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:51 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

Remove the static qualifier from lookup_untracked() and make it
available to other modules by exporting it from dir.h.  This will be
used later when we need to find entries to mark 'fsmonitor dirty.'

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---
 dir.c | 2 +-
 dir.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index f451bfa48c..1b5558fdf9 100644
--- a/dir.c
+++ b/dir.c
@@ -660,7 +660,7 @@ static void trim_trailing_spaces(char *buf)
  *
  * If "name" has the trailing slash, it'll be excluded in the search.
  */
-static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
 						    struct untracked_cache_dir *dir,
 						    const char *name, int len)
 {
diff --git a/dir.h b/dir.h
index bf23a470af..9e387551bd 100644
--- a/dir.h
+++ b/dir.h
@@ -339,4 +339,7 @@ extern void connect_work_tree_and_git_dir(const char *work_tree, const char *git
 extern void relocate_gitdir(const char *path,
 			    const char *old_git_dir,
 			    const char *new_git_dir);
+struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+					     struct untracked_cache_dir *dir,
+					     const char *name, int len);
 #endif
-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v4 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
  2017-06-01 15:51 ` [PATCH v4 1/6] bswap: add 64 bit endianness helper get_be64 Ben Peart
  2017-06-01 15:51 ` [PATCH v4 2/6] dir: make lookup_untracked() available outside of dir.c Ben Peart
@ 2017-06-01 15:51 ` Ben Peart
  2017-06-01 15:51 ` [PATCH v4 4/6] fsmonitor: add test cases for fsmonitor extension Ben Peart
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:51 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

When the index is read from disk, the query-fsmonitor index extension is
used to flag the last known potentially dirty index and untracked cache
entries.

If git finds out some entries are 'fsmonitor-dirty', but are really
unchanged (e.g. the file was changed, then reverted back), then Git will
clear the marking in the extension. If git adds or updates an index
entry, it is marked 'fsmonitor-dirty' to ensure it is checked for
changes in the working directory.

Before the 'fsmonitor-dirty' flags are used to limit the scope of the
files to be checked, the query-fsmonitor hook proc is called with the
time the index was last updated.  The hook proc returns the list of
files changed since that last updated time and the list of
potentially dirty entries is updated to reflect the current state.

refresh_index() and valid_cached_dir() are updated so that any entry not
flagged as potentially dirty is not checked as it cannot have any
changes.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---
 Makefile               |   1 +
 builtin/update-index.c |   1 +
 cache.h                |   5 ++
 config.c               |   5 ++
 dir.c                  |  14 +++
 dir.h                  |   2 +
 entry.c                |   1 +
 environment.c          |   1 +
 fsmonitor.c            | 238 +++++++++++++++++++++++++++++++++++++++++++++++++
 fsmonitor.h            |   9 ++
 read-cache.c           |  28 +++++-
 unpack-trees.c         |   1 +
 12 files changed, 304 insertions(+), 2 deletions(-)
 create mode 100644 fsmonitor.c
 create mode 100644 fsmonitor.h

diff --git a/Makefile b/Makefile
index e35542e631..488a4466cc 100644
--- a/Makefile
+++ b/Makefile
@@ -760,6 +760,7 @@ LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
+LIB_OBJS += fsmonitor.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
 LIB_OBJS += graph.o
diff --git a/builtin/update-index.c b/builtin/update-index.c
index ebfc09faa0..32fd977b43 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -232,6 +232,7 @@ static int mark_ce_flags(const char *path, int flag, int mark)
 		else
 			active_cache[pos]->ce_flags &= ~flag;
 		active_cache[pos]->ce_flags |= CE_UPDATE_IN_BASE;
+		active_cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
 		cache_tree_invalidate_path(&the_index, path);
 		active_cache_changed |= CE_ENTRY_CHANGED;
 		return 0;
diff --git a/cache.h b/cache.h
index 188811920c..58e5abf69f 100644
--- a/cache.h
+++ b/cache.h
@@ -201,6 +201,7 @@ struct cache_entry {
 #define CE_ADDED             (1 << 19)
 
 #define CE_HASHED            (1 << 20)
+#define CE_FSMONITOR_DIRTY   (1 << 21)
 #define CE_WT_REMOVE         (1 << 22) /* remove in work directory */
 #define CE_CONFLICTED        (1 << 23)
 
@@ -324,6 +325,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED	(1 << 5)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
 #define UNTRACKED_CHANGED	(1 << 7)
+#define FSMONITOR_CHANGED	(1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -342,6 +344,8 @@ struct index_state {
 	struct hashmap dir_hash;
 	unsigned char sha1[20];
 	struct untracked_cache *untracked;
+	uint64_t fsmonitor_last_update;
+	struct ewah_bitmap *fsmonitor_dirty;
 };
 
 extern struct index_state the_index;
@@ -766,6 +770,7 @@ extern int precomposed_unicode;
 extern int protect_hfs;
 extern int protect_ntfs;
 extern int git_db_env, git_index_env, git_graft_env, git_common_dir_env;
+extern int core_fsmonitor;
 
 /*
  * Include broken refs in all ref iterations, which will
diff --git a/config.c b/config.c
index bb4d735701..1a8108636d 100644
--- a/config.c
+++ b/config.c
@@ -1232,6 +1232,11 @@ static int git_default_core_config(const char *var, const char *value)
 		return 0;
 	}
 
+	if (!strcmp(var, "core.fsmonitor")) {
+		core_fsmonitor = git_config_bool(var, value);
+		return 0;
+	}
+
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/dir.c b/dir.c
index 1b5558fdf9..37f1c580a5 100644
--- a/dir.c
+++ b/dir.c
@@ -16,6 +16,7 @@
 #include "utf8.h"
 #include "varint.h"
 #include "ewah/ewok.h"
+#include "fsmonitor.h"
 
 /*
  * Tells read_directory_recursive how a file or directory should be treated.
@@ -1652,6 +1653,18 @@ static int valid_cached_dir(struct dir_struct *dir,
 	if (!untracked)
 		return 0;
 
+	refresh_by_fsmonitor(&the_index);
+	if (dir->untracked->use_fsmonitor) {
+		/*
+		 * With fsmonitor, we can trust the untracked cache's
+		 * valid field.
+		 */
+		if (untracked->valid)
+			goto skip_stat;
+		else
+			invalidate_directory(dir->untracked, untracked);
+	}
+
 	if (stat(path->len ? path->buf : ".", &st)) {
 		invalidate_directory(dir->untracked, untracked);
 		memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
@@ -1665,6 +1678,7 @@ static int valid_cached_dir(struct dir_struct *dir,
 		return 0;
 	}
 
+skip_stat:
 	if (untracked->check_only != !!check_only) {
 		invalidate_directory(dir->untracked, untracked);
 		return 0;
diff --git a/dir.h b/dir.h
index 9e387551bd..ff6a00abcc 100644
--- a/dir.h
+++ b/dir.h
@@ -139,6 +139,8 @@ struct untracked_cache {
 	int gitignore_invalidated;
 	int dir_invalidated;
 	int dir_opened;
+	/* fsmonitor invalidation data */
+	unsigned int use_fsmonitor : 1;
 };
 
 struct dir_struct {
diff --git a/entry.c b/entry.c
index d2b512da90..c2d3c1079c 100644
--- a/entry.c
+++ b/entry.c
@@ -221,6 +221,7 @@ static int write_entry(struct cache_entry *ce,
 			lstat(ce->name, &st);
 		fill_stat_cache_info(ce, &st);
 		ce->ce_flags |= CE_UPDATE_IN_BASE;
+		ce->ce_flags |= CE_FSMONITOR_DIRTY;
 		state->istate->cache_changed |= CE_ENTRY_CHANGED;
 	}
 	return 0;
diff --git a/environment.c b/environment.c
index 560408953c..1afabbae8c 100644
--- a/environment.c
+++ b/environment.c
@@ -64,6 +64,7 @@ int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */
 unsigned long pack_size_limit_cfg;
 enum hide_dotfiles_type hide_dotfiles = HIDE_DOTFILES_DOTGITONLY;
 enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET;
+int core_fsmonitor;
 
 #ifndef PROTECT_HFS_DEFAULT
 #define PROTECT_HFS_DEFAULT 0
diff --git a/fsmonitor.c b/fsmonitor.c
new file mode 100644
index 0000000000..3ce262d47d
--- /dev/null
+++ b/fsmonitor.c
@@ -0,0 +1,238 @@
+#include "cache.h"
+#include "dir.h"
+#include "ewah/ewok.h"
+#include "run-command.h"
+#include "strbuf.h"
+#include "fsmonitor.h"
+
+#define INDEX_EXTENSION_VERSION	1
+#define HOOK_INTERFACE_VERSION		1
+
+static struct untracked_cache_dir *find_untracked_cache_dir(
+	struct untracked_cache *uc, struct untracked_cache_dir *ucd,
+	const char *name)
+{
+	const char *end;
+	struct untracked_cache_dir *dir = ucd;
+
+	if (!*name)
+		return dir;
+
+	end = strchr(name, '/');
+	if (end) {
+		dir = lookup_untracked(uc, ucd, name, end - name);
+		if (dir)
+			return find_untracked_cache_dir(uc, dir, end + 1);
+	}
+
+	return dir;
+}
+
+/* This function will be passed to ewah_each_bit() */
+static void mark_no_fsmonitor(size_t pos, void *is)
+{
+	struct index_state *istate = is;
+	struct untracked_cache_dir *dir;
+	struct cache_entry *ce = istate->cache[pos];
+
+	assert(pos < istate->cache_nr);
+	ce->ce_flags |= CE_FSMONITOR_DIRTY;
+
+	if (!istate->untracked || !istate->untracked->root)
+		return;
+
+	dir = find_untracked_cache_dir(istate->untracked, istate->untracked->root, ce->name);
+	if (dir)
+		dir->valid = 0;
+}
+
+int read_fsmonitor_extension(struct index_state *istate, const void *data,
+	unsigned long sz)
+{
+	const char *index = data;
+	uint32_t hdr_version;
+	uint32_t ewah_size;
+	int ret;
+
+	if (sz < sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint32_t))
+		return error("corrupt fsmonitor extension (too short)");
+
+	hdr_version = get_be32(index);
+	index += sizeof(uint32_t);
+	if (hdr_version != INDEX_EXTENSION_VERSION)
+		return error("bad fsmonitor version %d", hdr_version);
+
+	istate->fsmonitor_last_update = get_be64(index);
+	index += sizeof(uint64_t);
+
+	ewah_size = get_be32(index);
+	index += sizeof(uint32_t);
+
+	istate->fsmonitor_dirty = ewah_new();
+	ret = ewah_read_mmap(istate->fsmonitor_dirty, index, ewah_size);
+	if (ret != ewah_size) {
+		ewah_free(istate->fsmonitor_dirty);
+		istate->fsmonitor_dirty = NULL;
+		return error("failed to parse ewah bitmap reading fsmonitor index extension");
+	}
+
+	return 0;
+}
+
+void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
+{
+	uint32_t hdr_version;
+	uint64_t tm;
+	struct ewah_bitmap *bitmap;
+	int i;
+	uint32_t ewah_start;
+	uint32_t ewah_size = 0;
+	int fixup = 0;
+
+	hdr_version = htonl(INDEX_EXTENSION_VERSION);
+	strbuf_add(sb, &hdr_version, sizeof(uint32_t));
+
+	tm = htonll((uint64_t)istate->fsmonitor_last_update);
+	strbuf_add(sb, &tm, sizeof(uint64_t));
+	fixup = sb->len;
+	strbuf_add(sb, &ewah_size, sizeof(uint32_t)); /* we'll fix this up later */
+
+	ewah_start = sb->len;
+	bitmap = ewah_new();
+	for (i = 0; i < istate->cache_nr; i++)
+		if (istate->cache[i]->ce_flags & CE_FSMONITOR_DIRTY)
+			ewah_set(bitmap, i);
+	ewah_serialize_strbuf(bitmap, sb);
+	ewah_free(bitmap);
+
+	/* fix up size field */
+	ewah_size = htonl(sb->len - ewah_start);
+	memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t));
+}
+
+static void mark_file_dirty(struct index_state *istate, const char *name)
+{
+	struct untracked_cache_dir *dir;
+	int pos;
+
+	/* find it in the index and mark that entry as dirty */
+	pos = index_name_pos(istate, name, strlen(name));
+	if (pos >= 0)
+		istate->cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
+
+	/*
+	 * Find the corresponding directory in the untracked cache
+	 * and mark it as invalid
+	 */
+	if (!istate->untracked || !istate->untracked->root)
+		return;
+
+	dir = find_untracked_cache_dir(istate->untracked, istate->untracked->root, name);
+	if (dir)
+		dir->valid = 0;
+}
+
+/*
+ * Call the query-fsmonitor hook passing the time of the last saved results.
+ */
+static int query_fsmonitor(int version, uint64_t last_update, struct strbuf *query_result)
+{
+	struct child_process cp = CHILD_PROCESS_INIT;
+	char ver[64];
+	char date[64];
+	const char *argv[4];
+
+	if (!(argv[0] = find_hook("query-fsmonitor")))
+		return -1;
+
+	snprintf(ver, sizeof(version), "%d", version);
+	snprintf(date, sizeof(date), "%" PRIuMAX, (uintmax_t)last_update);
+	argv[1] = ver;
+	argv[2] = date;
+	argv[3] = NULL;
+	cp.argv = argv;
+	cp.out = -1;
+
+	return capture_command(&cp, query_result, 1024);
+}
+
+void process_fsmonitor_extension(struct index_state *istate)
+{
+	if (!istate->fsmonitor_dirty)
+		return;
+
+	ewah_each_bit(istate->fsmonitor_dirty, mark_no_fsmonitor, istate);
+	ewah_free(istate->fsmonitor_dirty);
+	istate->fsmonitor_dirty = NULL;
+}
+
+void refresh_by_fsmonitor(struct index_state *istate)
+{
+	static int has_run_once = 0;
+	struct strbuf query_result = STRBUF_INIT;
+	int query_success = 0;
+	size_t bol = 0; /* beginning of line */
+	uint64_t last_update;
+	char *buf, *entry;
+	int i;
+
+	if (!core_fsmonitor || has_run_once)
+		return;
+	has_run_once = 1;
+
+	/*
+	 * This could be racy so save the date/time now and the hook
+	 * should be inclusive to ensure we don't miss potential changes.
+	 */
+	last_update = getnanotime();
+
+	/*
+	 * If we have a last update time, call query-monitor for the set of
+	 * changes since that time.
+	 */
+	if (istate->fsmonitor_last_update)
+		query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION,
+							istate->fsmonitor_last_update, &query_result);
+
+	if (query_success) {
+		/* Mark all entries returned by the monitor as dirty */
+		buf = entry = query_result.buf;
+		for (i = 0; i < query_result.len; i++) {
+			if (buf[i] != '\0')
+				continue;
+			mark_file_dirty(istate, buf + bol);
+			bol = i + 1;
+		}
+		if (bol < query_result.len)
+			mark_file_dirty(istate, buf + bol);
+
+		/* Mark all clean entries up-to-date */
+		for (i = 0; i < istate->cache_nr; i++) {
+			struct cache_entry *ce = istate->cache[i];
+			if (ce_stage(ce) || (ce->ce_flags & CE_FSMONITOR_DIRTY))
+				continue;
+			ce_mark_uptodate(ce);
+		}
+
+		/*
+		 * Now that we've marked the invalid entries in the
+		 * untracked-cache itself, we can mark the untracked cache for
+		 * fsmonitor usage.
+		 */
+		if (istate->untracked)
+			istate->untracked->use_fsmonitor = 1;
+	} else {
+		/* if we can't update the cache, fall back to checking them all */
+		for (i = 0; i < istate->cache_nr; i++)
+			istate->cache[i]->ce_flags |= CE_FSMONITOR_DIRTY;
+
+		/* mark the untracked cache as unusable for fsmonitor */
+		if (istate->untracked)
+			istate->untracked->use_fsmonitor = 0;
+	}
+	strbuf_release(&query_result);
+
+	/* Now that we've updated istate, save the last_update time */
+	istate->fsmonitor_last_update = last_update;
+	istate->cache_changed |= FSMONITOR_CHANGED;
+}
diff --git a/fsmonitor.h b/fsmonitor.h
new file mode 100644
index 0000000000..57b061688f
--- /dev/null
+++ b/fsmonitor.h
@@ -0,0 +1,9 @@
+#ifndef FSMONITOR_H
+#define FSMONITOR_H
+
+int read_fsmonitor_extension(struct index_state *istate, const void *data, unsigned long sz);
+void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate);
+void process_fsmonitor_extension(struct index_state *istate);
+void refresh_by_fsmonitor(struct index_state *istate);
+
+#endif
diff --git a/read-cache.c b/read-cache.c
index 3339de8124..5b52f08db2 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -18,6 +18,7 @@
 #include "varint.h"
 #include "split-index.h"
 #include "utf8.h"
+#include "fsmonitor.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -37,11 +38,12 @@
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b	  /* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452	  /* "UNTR" */
+#define CACHE_EXT_FSMONITOR 0x46534D4E	  /* "FSMN" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
+		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED | FSMONITOR_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
@@ -61,6 +63,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
 	free(old);
 	set_index_entry(istate, nr, ce);
 	ce->ce_flags |= CE_UPDATE_IN_BASE;
+	ce->ce_flags |= CE_FSMONITOR_DIRTY;
 	istate->cache_changed |= CE_ENTRY_CHANGED;
 }
 
@@ -777,6 +780,7 @@ int chmod_index_entry(struct index_state *istate, struct cache_entry *ce,
 	}
 	cache_tree_invalidate_path(istate, ce->name);
 	ce->ce_flags |= CE_UPDATE_IN_BASE;
+	ce->ce_flags |= CE_FSMONITOR_DIRTY;
 	istate->cache_changed |= CE_ENTRY_CHANGED;
 
 	return 0;
@@ -1344,6 +1348,8 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 	const char *added_fmt;
 	const char *unmerged_fmt;
 
+	refresh_by_fsmonitor(istate);
+
 	modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n");
 	deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n");
 	typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
@@ -1380,8 +1386,11 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 			continue;
 
 		new = refresh_cache_ent(istate, ce, options, &cache_errno, &changed);
-		if (new == ce)
+		if (new == ce) {
+			ce->ce_flags &= ~CE_FSMONITOR_DIRTY;
 			continue;
+		}
+
 		if (!new) {
 			const char *fmt;
 
@@ -1391,6 +1400,7 @@ int refresh_index(struct index_state *istate, unsigned int flags,
 				 */
 				ce->ce_flags &= ~CE_VALID;
 				ce->ce_flags |= CE_UPDATE_IN_BASE;
+				ce->ce_flags |= CE_FSMONITOR_DIRTY;
 				istate->cache_changed |= CE_ENTRY_CHANGED;
 			}
 			if (quiet)
@@ -1549,6 +1559,9 @@ static int read_index_extension(struct index_state *istate,
 	case CACHE_EXT_UNTRACKED:
 		istate->untracked = read_untracked_extension(data, sz);
 		break;
+	case CACHE_EXT_FSMONITOR:
+		read_fsmonitor_extension(istate, data, sz);
+		break;
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1721,6 +1734,7 @@ static void post_read_index_from(struct index_state *istate)
 	check_ce_order(istate);
 	tweak_untracked_cache(istate);
 	tweak_split_index(istate);
+	process_fsmonitor_extension(istate);
 }
 
 /* remember to discard_cache() before reading a different cache! */
@@ -2300,6 +2314,16 @@ static int do_write_index(struct index_state *istate, int newfd,
 		if (err)
 			return -1;
 	}
+	if (!strip_extensions && istate->fsmonitor_last_update) {
+		struct strbuf sb = STRBUF_INIT;
+
+		write_fsmonitor_extension(&sb, istate);
+		err = write_index_ext_header(&c, newfd, CACHE_EXT_FSMONITOR, sb.len) < 0
+			|| ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
 
 	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
 		return -1;
diff --git a/unpack-trees.c b/unpack-trees.c
index aa15111fef..259e6960b9 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -412,6 +412,7 @@ static int apply_sparse_checkout(struct index_state *istate,
 		ce->ce_flags &= ~CE_SKIP_WORKTREE;
 	if (was_skip_worktree != ce_skip_worktree(ce)) {
 		ce->ce_flags |= CE_UPDATE_IN_BASE;
+		ce->ce_flags |= CE_FSMONITOR_DIRTY;
 		istate->cache_changed |= CE_ENTRY_CHANGED;
 	}
 
-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v4 4/6] fsmonitor: add test cases for fsmonitor extension
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
                   ` (2 preceding siblings ...)
  2017-06-01 15:51 ` [PATCH v4 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files Ben Peart
@ 2017-06-01 15:51 ` Ben Peart
  2017-06-01 15:51 ` [PATCH v4 5/6] fsmonitor: add documentation for the " Ben Peart
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:51 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

Add test cases that ensure status results are correct when using the new
fsmonitor extension.  Test untracked, modified, and new files by
ensuring the results are identical to when not using the extension.

Add a test to ensure updates to the index properly mark corresponding
entries in the index extension as dirty so that the status is correct
after commands that modify the index but don't trigger changes in the
working directory.

Add a test that verifies that if the fsmonitor extension doesn't tell
git about a change, it doesn't discover it on its own.  This ensures
git is honoring the extension and that we get the performance benefits
desired.

All test hooks output a marker file that is used to ensure the hook
was actually used to generate the test results.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---
 t/t7519-status-fsmonitor.sh | 173 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100755 t/t7519-status-fsmonitor.sh

diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
new file mode 100755
index 0000000000..458eabe6dc
--- /dev/null
+++ b/t/t7519-status-fsmonitor.sh
@@ -0,0 +1,173 @@
+#!/bin/sh
+
+test_description='git status with file system watcher'
+
+. ./test-lib.sh
+
+clean_repo () {
+	git reset --hard HEAD &&
+	git clean -fd &&
+	rm -f marker
+}
+
+dirty_repo () {
+	: >untracked &&
+	: >dir1/untracked &&
+	: >dir2/untracked &&
+	echo 1 >modified &&
+	echo 2 >dir1/modified &&
+	echo 3 >dir2/modified &&
+	echo 4 >new &&
+	echo 5 >dir1/new &&
+	echo 6 >dir2/new &&
+	git add new &&
+	git add dir1/new &&
+	git add dir2/new
+}
+
+# The test query-fsmonitor hook proc will output a marker file we can use to
+# ensure the hook was actually used to generate the correct results.
+
+# fsmonitor works correctly with or without the untracked cache
+# but if it is available, we'll turn it on to ensure we test that
+# codepath as well.
+
+test_lazy_prereq UNTRACKED_CACHE '
+	{ git update-index --test-untracked-cache; ret=$?; } &&
+	test $ret -ne 1
+'
+
+if test_have_prereq UNTRACKED_CACHE; then
+	git config core.untrackedcache true
+else
+	git config core.untrackedcache false
+fi
+
+test_expect_success 'setup' '
+	mkdir -p .git/hooks &&
+	: >tracked &&
+	: >modified &&
+	mkdir dir1 &&
+	: >dir1/tracked &&
+	: >dir1/modified &&
+	mkdir dir2 &&
+	: >dir2/tracked &&
+	: >dir2/modified &&
+	git add . &&
+	test_tick &&
+	git commit -m initial &&
+	git config core.fsmonitor true &&
+	cat >.gitignore <<-\EOF
+	.gitignore
+	expect*
+	output*
+	marker*
+	EOF
+'
+
+# Ensure commands that call refresh_index() to move the index back in time
+# properly invalidate the fsmonitor cache
+
+test_expect_success 'refresh_index() invalidates fsmonitor cache' '
+	git status &&
+	test_path_is_missing marker &&
+	dirty_repo &&
+	write_script .git/hooks/query-fsmonitor<<-\EOF &&
+	:>marker
+	EOF
+	git add . &&
+	git commit -m "to reset" &&
+	git status &&
+	test_path_is_file marker &&
+	git reset HEAD~1 &&
+	rm -f marker &&
+	git status >output &&
+	test_path_is_file marker &&
+	git -c core.fsmonitor=false status >expect &&
+	test_i18ncmp expect output
+'
+
+# Now make sure it's actually skipping the check for modified and untracked
+# files unless it is told about them.  Note, after "git reset --hard HEAD" no
+# extensions exist other than 'TREE' so do a "git status" to get the extension
+# written before testing the results.
+
+test_expect_success "status doesn't detect unreported modifications" '
+	write_script .git/hooks/query-fsmonitor<<-\EOF &&
+	:>marker
+	EOF
+	clean_repo &&
+	git status &&
+	test_path_is_missing marker &&
+	: >untracked &&
+	echo 2 >dir1/modified &&
+	git status >output &&
+	test_path_is_file marker &&
+	test_i18ngrep ! "Changes not staged for commit:" output &&
+	test_i18ngrep ! "Untracked files:" output &&
+	write_script .git/hooks/query-fsmonitor<<-\EOF &&
+	:>marker
+	printf "untracked\0"
+	printf "dir1/modified\0"
+	EOF
+	rm -f marker &&
+	git status >output &&
+	test_path_is_file marker &&
+	test_i18ngrep "Changes not staged for commit:" output &&
+	test_i18ngrep "Untracked files:" output
+'
+
+# Status is well tested elsewhere so we'll just ensure that the results are
+# the same when using core.fsmonitor. First call after turning on the option
+# does a complete scan so we need to do two calls to ensure we test the new
+# codepath.
+
+test_expect_success 'status with core.untrackedcache false' '
+	git config core.untrackedcache false &&
+	write_script .git/hooks/query-fsmonitor<<-\EOF &&
+	if [ $1 -ne 1 ]
+	then
+		echo -e "Unsupported query-fsmonitor hook version.\n" >&2
+		exit 1;
+	fi
+	: >marker
+	printf "untracked\0"
+	printf "dir1/untracked\0"
+	printf "dir2/untracked\0"
+	printf "modified\0"
+	printf "dir1/modified\0"
+	printf "dir2/modified\0"
+	printf "new\0""
+	printf "dir1/new\0"
+	printf "dir2/new\0"
+	EOF
+	clean_repo &&
+	dirty_repo &&
+	git -c core.fsmonitor=false status >expect &&
+	clean_repo &&
+	git status &&
+	test_path_is_missing marker &&
+	dirty_repo &&
+	git status >output &&
+	test_path_is_file marker &&
+	test_i18ncmp expect output
+'
+
+if ! test_have_prereq UNTRACKED_CACHE; then
+	skip_all='This system does not support untracked cache'
+	test_done
+fi
+
+test_expect_success 'status with core.untrackedcache true' '
+	git config core.untrackedcache true &&
+	git -c core.fsmonitor=false status >expect &&
+	clean_repo &&
+	git status &&
+	test_path_is_missing marker &&
+	dirty_repo &&
+	git status >output &&
+	test_path_is_file marker &&
+	test_i18ncmp expect output
+'
+
+test_done
-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v4 5/6] fsmonitor: add documentation for the fsmonitor extension.
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
                   ` (3 preceding siblings ...)
  2017-06-01 15:51 ` [PATCH v4 4/6] fsmonitor: add test cases for fsmonitor extension Ben Peart
@ 2017-06-01 15:51 ` Ben Peart
  2017-06-01 15:51 ` [PATCH v4 6/6] fsmonitor: add a sample query-fsmonitor hook script for Watchman Ben Peart
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:51 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

This includes the core.fsmonitor setting, the query-fsmonitor hook,
and the fsmonitor index extension.

Signed-off-by: Ben Peart <benpeart@microsoft.com>
---
 Documentation/config.txt                 |  7 +++++++
 Documentation/githooks.txt               | 23 +++++++++++++++++++++++
 Documentation/technical/index-format.txt | 19 +++++++++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index e0b9fd0bc3..5211388167 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -389,6 +389,13 @@ core.protectNTFS::
 	8.3 "short" names.
 	Defaults to `true` on Windows, and `false` elsewhere.
 
+core.fsmonitor::
+	If set to true, call the query-fsmonitor hook proc which will
+	identify all files that may have had changes since the last
+	request. This information is used to speed up operations like
+	'git commit' and 'git status' by limiting what git must scan to
+	detect changes.
+
 core.trustctime::
 	If false, the ctime differences between the index and the
 	working tree are ignored; useful when the inode change time
diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt
index 706091a569..48127e8729 100644
--- a/Documentation/githooks.txt
+++ b/Documentation/githooks.txt
@@ -448,6 +448,29 @@ The commits are guaranteed to be listed in the order that they were
 processed by rebase.
 
 
+[[query-fsmonitor]]
+query-fsmonitor
+~~~~~~~~~~~~~~~
+
+This hook is invoked when the configuration option core.fsmonitor is
+set and git needs to identify changed or untracked files.  It takes
+two arguments, a version (currently 1) and the time in elapsed
+nanoseconds since midnight, January 1, 1970.
+
+The hook should output to stdout the list of all files in the working
+directory that may have changed since the requested time.  The logic
+should be inclusive so that it does not miss any potential changes.
+The paths should be relative to the root of the working directory
+and be separated by a single NUL.
+
+Git will limit what files it checks for changes as well as which
+directories are checked for untracked files based on the path names
+given.
+
+The exit status determines whether git will use the data from the
+hook to limit its search.  On error, it will fall back to verifying
+all files and folders.
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index ade0b0c445..7aeeea6f94 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -295,3 +295,22 @@ The remaining data of each directory block is grouped by type:
     in the previous ewah bitmap.
 
   - One NUL.
+
+== File System Monitor cache
+
+  The file system monitor cache tracks files for which the query-fsmonitor
+  hook has told us about changes.  The signature for this extension is
+  { 'F', 'S', 'M', 'N' }.
+
+  The extension starts with
+
+  - 32-bit version number: the current supported version is 1.
+
+  - 64-bit time: the extension data reflects all changes through the given
+	time which is stored as the nanoseconds elapsed since midnight,
+	January 1, 1970.
+
+  - 32-bit bitmap size: the size of the CE_FSMONITOR_DIRTY bitmap.
+
+  - An ewah bitmap, the n-th bit indicates whether the n-th index entry
+    is CE_FSMONITOR_DIRTY.
-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH v4 6/6] fsmonitor: add a sample query-fsmonitor hook script for Watchman
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
                   ` (4 preceding siblings ...)
  2017-06-01 15:51 ` [PATCH v4 5/6] fsmonitor: add documentation for the " Ben Peart
@ 2017-06-01 15:51 ` Ben Peart
  2017-06-07 21:38   ` Ævar Arnfjörð Bjarmason
  2017-06-01 19:57 ` [PATCH v4 0/6] Fast git status via a file system watcher Ævar Arnfjörð Bjarmason
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 29+ messages in thread
From: Ben Peart @ 2017-06-01 15:51 UTC (permalink / raw)
  To: git
  Cc: gitster, benpeart, pclouds, johannes.schindelin, David.Turner,
	peff, christian.couder, avarab

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 2901 bytes --]

This hook script integrates the new fsmonitor capabilities of git with
the cross platform Watchman file watching service. To use the script:

Download and install Watchman from https://facebook.github.io/watchman/
and instruct Watchman to watch your working directory for changes
('watchman watch-project /usr/src/git').

Rename the sample integration hook from query-fsmonitor.sample to
query-fsmonitor.

Configure git to use the extension ('git config core.fsmonitor true')
and optionally turn on the untracked cache for optimal performance
('git config core.untrackedcache true').

Signed-off-by: Ben Peart <benpeart@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 templates/hooks--query-fsmonitor.sample | 60 +++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100755 templates/hooks--query-fsmonitor.sample

diff --git a/templates/hooks--query-fsmonitor.sample b/templates/hooks--query-fsmonitor.sample
new file mode 100755
index 0000000000..941c4c5b57
--- /dev/null
+++ b/templates/hooks--query-fsmonitor.sample
@@ -0,0 +1,60 @@
+#!/bin/sh
+#
+# An example hook script to integrate Watchman
+# (https://facebook.github.io/watchman/) with git to provide fast
+# git status.
+#
+# The hook is passed a version (currently 1) and a time in nanoseconds
+# formatted as a string and outputs to stdout all files that have been
+# modified since the given time. Paths must be relative to the root of
+# the working tree and separated by a single NUL.
+#
+# To enable this hook, rename this file to "query-fsmonitor"
+
+# check the hook interface version
+if [ $1 -eq 1 ]
+then
+	# convert nanoseconds to seconds
+	time_t=$(($2/1000000000))
+else
+	echo -e "Unsupported query-fsmonitor hook version.\nFalling back to scanning...\n" >&2
+	exit 1;
+fi
+
+# Convert unix style paths to escaped Windows style paths
+case "$(uname -s)" in
+MINGW*|MSYS_NT*)
+  GIT_WORK_TREE="$(cygpath -aw "$PWD" | sed 's,\\,\\\\,g')"
+  ;;
+*)
+  GIT_WORK_TREE="$PWD"
+  ;;
+esac
+
+# Query Watchman for all the changes since the requested time
+echo "[\"query\", \"$GIT_WORK_TREE\", {\"since\": $time_t, \"fields\":[\"name\"]}]" | \
+	watchman -j |
+	perl -0666 -e '
+		use strict;
+		use warnings;
+
+		my $stdin = <>;
+		die "Watchman: command returned no output.\nFalling back to scanning...\n" if $stdin eq "";
+		die "Watchman: command returned invalid output: $stdin\nFalling back to scanning...\n" unless $stdin =~ /^\{/;
+
+		my $json_pkg;
+		eval {
+			require JSON::XS;
+			$json_pkg = "JSON::XS";
+			1;
+		} or do {
+			require JSON::PP;
+			$json_pkg = "JSON::PP";
+		};
+
+		my $o = $json_pkg->new->utf8->decode($stdin);
+		die "Watchman: $o->{error}.\nFalling back to scanning...\n" if $o->{error};
+
+		local $, = "\0";
+		print @{$o->{files}};
+	'
-- 
2.13.0.windows.1.9.gc201c67b71


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
                   ` (5 preceding siblings ...)
  2017-06-01 15:51 ` [PATCH v4 6/6] fsmonitor: add a sample query-fsmonitor hook script for Watchman Ben Peart
@ 2017-06-01 19:57 ` Ævar Arnfjörð Bjarmason
  2017-06-01 21:06   ` Ben Peart
  2017-06-01 20:51 ` Ævar Arnfjörð Bjarmason
  2017-06-02  1:56 ` [PATCH v4 0/6] Fast git status via a file system watcher Junio C Hamano
  8 siblings, 1 reply; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-01 19:57 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder

On Thu, Jun 1, 2017 at 5:50 PM, Ben Peart <peartben@gmail.com> wrote:
> Changes from V3 include:
>  - update test script based on feedback
>  - update template hook proc with better post-processing code and make
>    it executable

Thanks, exciting stuff, do you have this pushed somewhere? I didn't
spot it it in your github repo. I had some issues applying this on top
of master @ 0339965c70, on 5/6 I got

    $ git am /tmp/original_msg.txt
    Applying: fsmonitor: add documentation for the fsmonitor extension.
    error: patch failed: Documentation/githooks.txt:448
    error: Documentation/githooks.txt: patch does not apply
    Patch failed at 0001 fsmonitor: add documentation for the
fsmonitor extension.
    The copy of the patch that failed is found in: .git/rebase-apply/patch
    When you have resolved this problem, run "git am --continue".
    If you prefer to skip this patch, run "git am --skip" instead.
    To restore the original branch and stop patching, run "git am --abort".

But it worked with patch, weirdly enough:

    $ patch -p1 </tmp/original_msg.txt
    (Stripping trailing CRs from patch; use --binary to disable.)
    patching file Documentation/config.txt
    Hunk #1 succeeded at 410 (offset 21 lines).
    (Stripping trailing CRs from patch; use --binary to disable.)
    patching file Documentation/githooks.txt
    Hunk #1 succeeded at 456 with fuzz 1 (offset 8 lines).
    (Stripping trailing CRs from patch; use --binary to disable.)
    patching file Documentation/technical/index-format.txt

The 6/6 patch failed due to an unknown charset y, you have
"Content-Type: text/plain; charset=y" in the header, worked after
manually munging it to "UTF-8", although it gave a warning...

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
                   ` (6 preceding siblings ...)
  2017-06-01 19:57 ` [PATCH v4 0/6] Fast git status via a file system watcher Ævar Arnfjörð Bjarmason
@ 2017-06-01 20:51 ` Ævar Arnfjörð Bjarmason
  2017-06-01 21:13   ` Ævar Arnfjörð Bjarmason
  2017-06-02  1:56 ` [PATCH v4 0/6] Fast git status via a file system watcher Junio C Hamano
  8 siblings, 1 reply; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-01 20:51 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder

On Thu, Jun 1, 2017 at 5:50 PM, Ben Peart <peartben@gmail.com> wrote:
> Changes from V3 include:
>  - update test script based on feedback
>  - update template hook proc with better post-processing code and make
>    it executable

I have watchman running finally, so aside from issues applying this I
can finally test this. I set up a watch of linux.git:

$ watchman watch $PWD
$ watchman watch-list|jq '.roots[]'
"/home/avar/g/linux"

And first, for simplicity, I'll be turning core.fsmonitore on later:

$ for c in fsmonitor untrackedCache splitIndex; do git config core.$c
false; done

On a hot fs cache running "git status" takes me 80ms, but if I flush caches:

# free && sync && echo 3 > /proc/sys/vm/drop_caches && free

Running "git status" takes me 4s. Now, right after flushing those caches:

$ date +%s
1496349235
$ time (echo '["query", "/home/avar/g/linux", {"since": 1496349235,
"fields":["name"]}]' | watchman -j) | jq '.files[]'

real    0m0.664s
user    0m0.000s
sys     0m0.004s
$ touch foo bar
$ time (echo '["query", "/home/avar/g/linux", {"since": 1496349235,
"fields":["name"]}]' | watchman -j) | jq '.files[]'
"bar"
"foo"

real    0m0.044s
user    0m0.000s
sys     0m0.000s

Without the monitor running "git status' takes me ~2.5s, i.e. from cold cache:

$ time GIT_TRACE=1 ~/g/git/git-status
20:34:49.154731 git.c:369               trace: built-in: git 'status'
On branch master
Your branch is up-to-date with 'origin/master'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)

        bar
        foo

nothing added to commit but untracked files present (use "git add" to track)

real    0m2.546s
user    0m0.060s
sys     0m0.228s

Now, I would expect the case where the working tree isn't in the fs
cache to be lightning fast, since the watchman command is really fast
(flushed the cache again, turned on the fsmonitor):

$ date +%s
1496349523
$ time (echo '["query", "/home/avar/g/linux", {"since": 1496349523,
"fields":["name"]}]' | watchman -j) | jq '.files[]'

real    0m0.529s
user    0m0.000s
sys     0m0.004s
$ touch foo bar
$ time (echo '["query", "/home/avar/g/linux", {"since": 1496349523,
"fields":["name"]}]' | watchman -j) | jq '.files[]'
"bar"
"foo"

real    0m0.312s
user    0m0.000s
sys     0m0.000s

But if I run "git status" (and I instrumented the hook to dump the
changed files) it takes a long time (those 10s are just the disk being
slow, but it should be ~0s, right?):

$ time GIT_TRACE=1 ~/g/git/git-status
20:39:11.250128 git.c:369               trace: built-in: git 'status'
20:39:14.586720 run-command.c:626       trace: run_command:
'.git/hooks/query-fsmonitor' '1' '1496349494197821000'
Watchman says these changed: bar foo hi there .git .git/index.lock .git/index
On branch master
Your branch is up-to-date with 'origin/master'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)

        bar
        foo

nothing added to commit but untracked files present (use "git add" to track)

real    0m10.791s
user    0m0.108s
sys     0m0.228s

If I re-run that it takes ~160-200ms with the hook, 80-120ms without
it, so in this case once the data is in the fscache watchman is
actually slower.

But manually running watchman shows that it's really fast, usually
returning within 3ms, I flush the fscache in the middle of this,
that's the 600ms time, that could be some global kernel lock or
something due to the odd manual proc operation, I haven't actually
tested this on a system under memory pressure:

$ for i in {1..10}; do (time (printf '["query", "/home/avar/g/linux",
{"since": %s, "fields":["name"]}]' $(($(date +%s)-3)) | watchman -j |
jq '.files[]')) 2>&1 | grep -e '"' -e ^real && touch $i && sleep 1;
done
real    0m0.004s
"1"
real    0m0.003s
"2"
"1"
real    0m0.605s
"3"
real    0m0.003s
"4"
"3"
real    0m0.003s
"5"
"4"
real    0m0.003s
"6"
"5"
real    0m0.003s
"7"
"6"
real    0m0.003s
"8"
"7"
real    0m0.003s
"9"
"8"
real    0m0.003s

So something really odd is going on here. This should be speeding up
"git status" a lot, even with a hot fs cache doing stat on all of
linux.git takes a lot longer than 3ms, but for some reason it's
slower.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 19:57 ` [PATCH v4 0/6] Fast git status via a file system watcher Ævar Arnfjörð Bjarmason
@ 2017-06-01 21:06   ` Ben Peart
  2017-06-01 21:12     ` Ævar Arnfjörð Bjarmason
  2017-06-01 21:13     ` Stefan Beller
  0 siblings, 2 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-01 21:06 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder



On 6/1/2017 3:57 PM, Ævar Arnfjörð Bjarmason wrote:
> On Thu, Jun 1, 2017 at 5:50 PM, Ben Peart <peartben@gmail.com> wrote:
>> Changes from V3 include:
>>   - update test script based on feedback
>>   - update template hook proc with better post-processing code and make
>>     it executable
> 
> Thanks, exciting stuff, do you have this pushed somewhere? I didn't
> spot it it in your github repo. I had some issues applying this on top
> of master @ 0339965c70, on 5/6 I got
> 

I just pushed this to github at 
https://github.com/benpeart/git-for-windows/tree/fsmonitor

>      $ git am /tmp/original_msg.txt
>      Applying: fsmonitor: add documentation for the fsmonitor extension.
>      error: patch failed: Documentation/githooks.txt:448
>      error: Documentation/githooks.txt: patch does not apply
>      Patch failed at 0001 fsmonitor: add documentation for the
> fsmonitor extension.
>      The copy of the patch that failed is found in: .git/rebase-apply/patch
>      When you have resolved this problem, run "git am --continue".
>      If you prefer to skip this patch, run "git am --skip" instead.
>      To restore the original branch and stop patching, run "git am --abort".
> 

Sorry, no idea on why this didn't work.  The patch was formatted with 
git format-patch but it's possible I've got something wrong.

> But it worked with patch, weirdly enough:
> 
>      $ patch -p1 </tmp/original_msg.txt
>      (Stripping trailing CRs from patch; use --binary to disable.)
>      patching file Documentation/config.txt
>      Hunk #1 succeeded at 410 (offset 21 lines).
>      (Stripping trailing CRs from patch; use --binary to disable.)
>      patching file Documentation/githooks.txt
>      Hunk #1 succeeded at 456 with fuzz 1 (offset 8 lines).
>      (Stripping trailing CRs from patch; use --binary to disable.)
>      patching file Documentation/technical/index-format.txt
> 
> The 6/6 patch failed due to an unknown charset y, you have
> "Content-Type: text/plain; charset=y" in the header, worked after
> manually munging it to "UTF-8", although it gave a warning...
> 

The only thing I see different about this patch is the special 
characters of your name in the sign-off line.  The call to git 
send-email prompted me about encoding - I wonder if my answer was 
incorrect?  Given you've probably dealt with your name in git patches 
before :), what should my answer be?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 21:06   ` Ben Peart
@ 2017-06-01 21:12     ` Ævar Arnfjörð Bjarmason
  2017-06-01 21:13     ` Stefan Beller
  1 sibling, 0 replies; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-01 21:12 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder

On Thu, Jun 1, 2017 at 11:06 PM, Ben Peart <peartben@gmail.com> wrote:
> On 6/1/2017 3:57 PM, Ævar Arnfjörð Bjarmason wrote:
>>
>> On Thu, Jun 1, 2017 at 5:50 PM, Ben Peart <peartben@gmail.com> wrote:
>>>
>>> Changes from V3 include:
>>>   - update test script based on feedback
>>>   - update template hook proc with better post-processing code and make
>>>     it executable
>>
>>
>> Thanks, exciting stuff, do you have this pushed somewhere? I didn't
>> spot it it in your github repo. I had some issues applying this on top
>> of master @ 0339965c70, on 5/6 I got
>>
>
> I just pushed this to github at
> https://github.com/benpeart/git-for-windows/tree/fsmonitor
>
>>      $ git am /tmp/original_msg.txt
>>      Applying: fsmonitor: add documentation for the fsmonitor extension.
>>      error: patch failed: Documentation/githooks.txt:448
>>      error: Documentation/githooks.txt: patch does not apply
>>      Patch failed at 0001 fsmonitor: add documentation for the
>> fsmonitor extension.
>>      The copy of the patch that failed is found in:
>> .git/rebase-apply/patch
>>      When you have resolved this problem, run "git am --continue".
>>      If you prefer to skip this patch, run "git am --skip" instead.
>>      To restore the original branch and stop patching, run "git am
>> --abort".
>>
>
> Sorry, no idea on why this didn't work.  The patch was formatted with git
> format-patch but it's possible I've got something wrong.
No idea what's going on there, anyway I can grab it from your github
url, thanks!
>> But it worked with patch, weirdly enough:
>>
>>      $ patch -p1 </tmp/original_msg.txt
>>      (Stripping trailing CRs from patch; use --binary to disable.)
>>      patching file Documentation/config.txt
>>      Hunk #1 succeeded at 410 (offset 21 lines).
>>      (Stripping trailing CRs from patch; use --binary to disable.)
>>      patching file Documentation/githooks.txt
>>      Hunk #1 succeeded at 456 with fuzz 1 (offset 8 lines).
>>      (Stripping trailing CRs from patch; use --binary to disable.)
>>      patching file Documentation/technical/index-format.txt
>>
>> The 6/6 patch failed due to an unknown charset y, you have
>> "Content-Type: text/plain; charset=y" in the header, worked after
>> manually munging it to "UTF-8", although it gave a warning...
>>
>
> The only thing I see different about this patch is the special characters of
> your name in the sign-off line.  The call to git send-email prompted me
> about encoding - I wonder if my answer was incorrect?  Given you've probably
> dealt with your name in git patches before :), what should my answer be?

Hah! I didn't think that the "y" could be the result of "yes" in some
interactive dialog. It never prompts me when I send patches, it just
works, and with UTF-8 it seems to have been applied correctly.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 21:06   ` Ben Peart
  2017-06-01 21:12     ` Ævar Arnfjörð Bjarmason
@ 2017-06-01 21:13     ` Stefan Beller
  2017-06-01 21:26       ` Jeff King
  1 sibling, 1 reply; 29+ messages in thread
From: Stefan Beller @ 2017-06-01 21:13 UTC (permalink / raw)
  To: Ben Peart
  Cc: Ævar Arnfjörð Bjarmason, Git Mailing List,
	Junio C Hamano, Ben Peart, Nguyễn Thái Ngọc Duy,
	Johannes Schindelin, David Turner, Jeff King, Christian Couder

>
>>      $ git am /tmp/original_msg.txt
>>      Applying: fsmonitor: add documentation for the fsmonitor extension.
>>      error: patch failed: Documentation/githooks.txt:448
>>      error: Documentation/githooks.txt: patch does not apply
>>      Patch failed at 0001 fsmonitor: add documentation for the
>> fsmonitor extension.
>>      The copy of the patch that failed is found in:
>> .git/rebase-apply/patch
>>      When you have resolved this problem, run "git am --continue".
>>      If you prefer to skip this patch, run "git am --skip" instead.
>>      To restore the original branch and stop patching, run "git am
>> --abort".

Try again with -3. (We should make that the default for am, maybe?)
It helped me for most of the conflicts that I saw.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 20:51 ` Ævar Arnfjörð Bjarmason
@ 2017-06-01 21:13   ` Ævar Arnfjörð Bjarmason
  2017-06-02  0:40     ` Ben Peart
  0 siblings, 1 reply; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-01 21:13 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder

On Thu, Jun 1, 2017 at 10:51 PM, Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
> On Thu, Jun 1, 2017 at 5:50 PM, Ben Peart <peartben@gmail.com> wrote:
>> Changes from V3 include:
>>  - update test script based on feedback
>>  - update template hook proc with better post-processing code and make
>>    it executable
>
> I have watchman running finally, so aside from issues applying this I
> can finally test this. I set up a watch of linux.git:
>
> $ watchman watch $PWD
> $ watchman watch-list|jq '.roots[]'
> "/home/avar/g/linux"
>
> And first, for simplicity, I'll be turning core.fsmonitore on later:
>
> $ for c in fsmonitor untrackedCache splitIndex; do git config core.$c
> false; done
>
> On a hot fs cache running "git status" takes me 80ms, but if I flush caches:
>
> # free && sync && echo 3 > /proc/sys/vm/drop_caches && free
>
> Running "git status" takes me 4s. Now, right after flushing those caches:
>
> $ date +%s
> 1496349235
> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349235,
> "fields":["name"]}]' | watchman -j) | jq '.files[]'
>
> real    0m0.664s
> user    0m0.000s
> sys     0m0.004s
> $ touch foo bar
> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349235,
> "fields":["name"]}]' | watchman -j) | jq '.files[]'
> "bar"
> "foo"
>
> real    0m0.044s
> user    0m0.000s
> sys     0m0.000s
>
> Without the monitor running "git status' takes me ~2.5s, i.e. from cold cache:
>
> $ time GIT_TRACE=1 ~/g/git/git-status
> 20:34:49.154731 git.c:369               trace: built-in: git 'status'
> On branch master
> Your branch is up-to-date with 'origin/master'.
>
> Untracked files:
>   (use "git add <file>..." to include in what will be committed)
>
>         bar
>         foo
>
> nothing added to commit but untracked files present (use "git add" to track)
>
> real    0m2.546s
> user    0m0.060s
> sys     0m0.228s
>
> Now, I would expect the case where the working tree isn't in the fs
> cache to be lightning fast, since the watchman command is really fast
> (flushed the cache again, turned on the fsmonitor):
>
> $ date +%s
> 1496349523
> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349523,
> "fields":["name"]}]' | watchman -j) | jq '.files[]'
>
> real    0m0.529s
> user    0m0.000s
> sys     0m0.004s
> $ touch foo bar
> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349523,
> "fields":["name"]}]' | watchman -j) | jq '.files[]'
> "bar"
> "foo"
>
> real    0m0.312s
> user    0m0.000s
> sys     0m0.000s
>
> But if I run "git status" (and I instrumented the hook to dump the
> changed files) it takes a long time (those 10s are just the disk being
> slow, but it should be ~0s, right?):
>
> $ time GIT_TRACE=1 ~/g/git/git-status
> 20:39:11.250128 git.c:369               trace: built-in: git 'status'
> 20:39:14.586720 run-command.c:626       trace: run_command:
> '.git/hooks/query-fsmonitor' '1' '1496349494197821000'
> Watchman says these changed: bar foo hi there .git .git/index.lock .git/index
> On branch master
> Your branch is up-to-date with 'origin/master'.
>
> Untracked files:
>   (use "git add <file>..." to include in what will be committed)
>
>         bar
>         foo
>
> nothing added to commit but untracked files present (use "git add" to track)
>
> real    0m10.791s
> user    0m0.108s
> sys     0m0.228s
>
> If I re-run that it takes ~160-200ms with the hook, 80-120ms without
> it, so in this case once the data is in the fscache watchman is
> actually slower.
>
> But manually running watchman shows that it's really fast, usually
> returning within 3ms, I flush the fscache in the middle of this,
> that's the 600ms time, that could be some global kernel lock or
> something due to the odd manual proc operation, I haven't actually
> tested this on a system under memory pressure:
>
> $ for i in {1..10}; do (time (printf '["query", "/home/avar/g/linux",
> {"since": %s, "fields":["name"]}]' $(($(date +%s)-3)) | watchman -j |
> jq '.files[]')) 2>&1 | grep -e '"' -e ^real && touch $i && sleep 1;
> done
> real    0m0.004s
> "1"
> real    0m0.003s
> "2"
> "1"
> real    0m0.605s
> "3"
> real    0m0.003s
> "4"
> "3"
> real    0m0.003s
> "5"
> "4"
> real    0m0.003s
> "6"
> "5"
> real    0m0.003s
> "7"
> "6"
> real    0m0.003s
> "8"
> "7"
> real    0m0.003s
> "9"
> "8"
> real    0m0.003s
>
> So something really odd is going on here. This should be speeding up
> "git status" a lot, even with a hot fs cache doing stat on all of
> linux.git takes a lot longer than 3ms, but for some reason it's
> slower.

Dug a bit further, with cold cache and no fsmonitor, gprof output from
-O0 compiled git:

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total
 time   seconds   seconds    calls  ms/call  ms/call  name
 37.50      0.03     0.03   123886     0.00     0.00  memihash
 12.50      0.04     0.01   284727     0.00     0.00  match_basename
 12.50      0.05     0.01    73989     0.00     0.00  match_pathname
 12.50      0.06     0.01    59924     0.00     0.00  hashmap_add
 12.50      0.07     0.01    59847     0.00     0.00  same_name
 12.50      0.08     0.01    59844     0.00     0.00  hash_index_entry
  0.00      0.08     0.00   598446     0.00     0.00  git_bswap32
  0.00      0.08     0.00   259150     0.00     0.00  fspathncmp
  0.00      0.08     0.00   178731     0.00     0.00  match_pathspec
  0.00      0.08     0.00   178655     0.00     0.00  do_match_pathspec
And then cold cache with fsmonitor:

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total
 time   seconds   seconds    calls  ms/call  ms/call  name
 66.67      0.04     0.04    96696     0.00     0.00  blk_SHA1_Block
 16.67      0.05     0.01    59844     0.00     0.00  cache_entry_from_ondisk
 16.67      0.06     0.01    58869     0.00     0.00  longest_path_match
  0.00      0.06     0.00  1547157     0.00     0.00  git_bswap32
  0.00      0.06     0.00  1196893     0.00     0.00  git_bswap32
  0.00      0.06     0.00   284727     0.00     0.00  match_basename
  0.00      0.06     0.00   259150     0.00     0.00  fspathncmp
  0.00      0.06     0.00   178561     0.00     0.00  do_match_pathspec
  0.00      0.06     0.00   178543     0.00     0.00  match_pathspec
  0.00      0.06     0.00   145141     0.00     0.00  memory_limit_check

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 21:13     ` Stefan Beller
@ 2017-06-01 21:26       ` Jeff King
  0 siblings, 0 replies; 29+ messages in thread
From: Jeff King @ 2017-06-01 21:26 UTC (permalink / raw)
  To: Stefan Beller
  Cc: Ben Peart, Ævar Arnfjörð Bjarmason,
	Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Christian Couder

On Thu, Jun 01, 2017 at 02:13:10PM -0700, Stefan Beller wrote:

> >
> >>      $ git am /tmp/original_msg.txt
> >>      Applying: fsmonitor: add documentation for the fsmonitor extension.
> >>      error: patch failed: Documentation/githooks.txt:448
> >>      error: Documentation/githooks.txt: patch does not apply
> >>      Patch failed at 0001 fsmonitor: add documentation for the
> >> fsmonitor extension.
> >>      The copy of the patch that failed is found in:
> >> .git/rebase-apply/patch
> >>      When you have resolved this problem, run "git am --continue".
> >>      If you prefer to skip this patch, run "git am --skip" instead.
> >>      To restore the original branch and stop patching, run "git am
> >> --abort".
> 
> Try again with -3. (We should make that the default for am, maybe?)
> It helped me for most of the conflicts that I saw.

Yeah, I was going to suggest the same thing. "git apply" is much pickier
than "patch" about fuzzing the hunks (intentionally so, because the
results can sometimes end up not what the sender intended). But in most
cases "-3" can apply the patch without losing safety (unless the sender
built off a branch that you don't even have).

There's some more discussion of --3way as the default in

  http://public-inbox.org/git/CAKwvOdn9j=_Ob=xq4ucN6Ar1G537zNiU9ox4iF6o1qO7kPY41A@mail.gmail.com/T/#u

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 21:13   ` Ævar Arnfjörð Bjarmason
@ 2017-06-02  0:40     ` Ben Peart
  2017-06-02 10:28       ` [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 29+ messages in thread
From: Ben Peart @ 2017-06-02  0:40 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder

Any chance you can provide me with a bash script that contains the exact 
sequence of commands you are running to get this result?  I've been 
trying to replicate it using your notes but have not been able to.  I'd 
like to see if it is a repo difference, a platform difference, a command 
sequence difference (or something else entirely :)).

Thanks,

Ben

On 6/1/2017 5:13 PM, Ævar Arnfjörð Bjarmason wrote:
> On Thu, Jun 1, 2017 at 10:51 PM, Ævar Arnfjörð Bjarmason
> <avarab@gmail.com> wrote:
>> On Thu, Jun 1, 2017 at 5:50 PM, Ben Peart <peartben@gmail.com> wrote:
>>> Changes from V3 include:
>>>   - update test script based on feedback
>>>   - update template hook proc with better post-processing code and make
>>>     it executable
>>
>> I have watchman running finally, so aside from issues applying this I
>> can finally test this. I set up a watch of linux.git:
>>
>> $ watchman watch $PWD
>> $ watchman watch-list|jq '.roots[]'
>> "/home/avar/g/linux"
>>
>> And first, for simplicity, I'll be turning core.fsmonitore on later:
>>
>> $ for c in fsmonitor untrackedCache splitIndex; do git config core.$c
>> false; done
>>
>> On a hot fs cache running "git status" takes me 80ms, but if I flush caches:
>>
>> # free && sync && echo 3 > /proc/sys/vm/drop_caches && free
>>
>> Running "git status" takes me 4s. Now, right after flushing those caches:
>>
>> $ date +%s
>> 1496349235
>> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349235,
>> "fields":["name"]}]' | watchman -j) | jq '.files[]'
>>
>> real    0m0.664s
>> user    0m0.000s
>> sys     0m0.004s
>> $ touch foo bar
>> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349235,
>> "fields":["name"]}]' | watchman -j) | jq '.files[]'
>> "bar"
>> "foo"
>>
>> real    0m0.044s
>> user    0m0.000s
>> sys     0m0.000s
>>
>> Without the monitor running "git status' takes me ~2.5s, i.e. from cold cache:
>>
>> $ time GIT_TRACE=1 ~/g/git/git-status
>> 20:34:49.154731 git.c:369               trace: built-in: git 'status'
>> On branch master
>> Your branch is up-to-date with 'origin/master'.
>>
>> Untracked files:
>>    (use "git add <file>..." to include in what will be committed)
>>
>>          bar
>>          foo
>>
>> nothing added to commit but untracked files present (use "git add" to track)
>>
>> real    0m2.546s
>> user    0m0.060s
>> sys     0m0.228s
>>
>> Now, I would expect the case where the working tree isn't in the fs
>> cache to be lightning fast, since the watchman command is really fast
>> (flushed the cache again, turned on the fsmonitor):
>>
>> $ date +%s
>> 1496349523
>> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349523,
>> "fields":["name"]}]' | watchman -j) | jq '.files[]'
>>
>> real    0m0.529s
>> user    0m0.000s
>> sys     0m0.004s
>> $ touch foo bar
>> $ time (echo '["query", "/home/avar/g/linux", {"since": 1496349523,
>> "fields":["name"]}]' | watchman -j) | jq '.files[]'
>> "bar"
>> "foo"
>>
>> real    0m0.312s
>> user    0m0.000s
>> sys     0m0.000s
>>
>> But if I run "git status" (and I instrumented the hook to dump the
>> changed files) it takes a long time (those 10s are just the disk being
>> slow, but it should be ~0s, right?):
>>
>> $ time GIT_TRACE=1 ~/g/git/git-status
>> 20:39:11.250128 git.c:369               trace: built-in: git 'status'
>> 20:39:14.586720 run-command.c:626       trace: run_command:
>> '.git/hooks/query-fsmonitor' '1' '1496349494197821000'
>> Watchman says these changed: bar foo hi there .git .git/index.lock .git/index
>> On branch master
>> Your branch is up-to-date with 'origin/master'.
>>
>> Untracked files:
>>    (use "git add <file>..." to include in what will be committed)
>>
>>          bar
>>          foo
>>
>> nothing added to commit but untracked files present (use "git add" to track)
>>
>> real    0m10.791s
>> user    0m0.108s
>> sys     0m0.228s
>>
>> If I re-run that it takes ~160-200ms with the hook, 80-120ms without
>> it, so in this case once the data is in the fscache watchman is
>> actually slower.
>>
>> But manually running watchman shows that it's really fast, usually
>> returning within 3ms, I flush the fscache in the middle of this,
>> that's the 600ms time, that could be some global kernel lock or
>> something due to the odd manual proc operation, I haven't actually
>> tested this on a system under memory pressure:
>>
>> $ for i in {1..10}; do (time (printf '["query", "/home/avar/g/linux",
>> {"since": %s, "fields":["name"]}]' $(($(date +%s)-3)) | watchman -j |
>> jq '.files[]')) 2>&1 | grep -e '"' -e ^real && touch $i && sleep 1;
>> done
>> real    0m0.004s
>> "1"
>> real    0m0.003s
>> "2"
>> "1"
>> real    0m0.605s
>> "3"
>> real    0m0.003s
>> "4"
>> "3"
>> real    0m0.003s
>> "5"
>> "4"
>> real    0m0.003s
>> "6"
>> "5"
>> real    0m0.003s
>> "7"
>> "6"
>> real    0m0.003s
>> "8"
>> "7"
>> real    0m0.003s
>> "9"
>> "8"
>> real    0m0.003s
>>
>> So something really odd is going on here. This should be speeding up
>> "git status" a lot, even with a hot fs cache doing stat on all of
>> linux.git takes a lot longer than 3ms, but for some reason it's
>> slower.
> 
> Dug a bit further, with cold cache and no fsmonitor, gprof output from
> -O0 compiled git:
> 
> Each sample counts as 0.01 seconds.
>    %   cumulative   self              self     total
>   time   seconds   seconds    calls  ms/call  ms/call  name
>   37.50      0.03     0.03   123886     0.00     0.00  memihash
>   12.50      0.04     0.01   284727     0.00     0.00  match_basename
>   12.50      0.05     0.01    73989     0.00     0.00  match_pathname
>   12.50      0.06     0.01    59924     0.00     0.00  hashmap_add
>   12.50      0.07     0.01    59847     0.00     0.00  same_name
>   12.50      0.08     0.01    59844     0.00     0.00  hash_index_entry
>    0.00      0.08     0.00   598446     0.00     0.00  git_bswap32
>    0.00      0.08     0.00   259150     0.00     0.00  fspathncmp
>    0.00      0.08     0.00   178731     0.00     0.00  match_pathspec
>    0.00      0.08     0.00   178655     0.00     0.00  do_match_pathspec
> And then cold cache with fsmonitor:
> 
> Each sample counts as 0.01 seconds.
>    %   cumulative   self              self     total
>   time   seconds   seconds    calls  ms/call  ms/call  name
>   66.67      0.04     0.04    96696     0.00     0.00  blk_SHA1_Block
>   16.67      0.05     0.01    59844     0.00     0.00  cache_entry_from_ondisk
>   16.67      0.06     0.01    58869     0.00     0.00  longest_path_match
>    0.00      0.06     0.00  1547157     0.00     0.00  git_bswap32
>    0.00      0.06     0.00  1196893     0.00     0.00  git_bswap32
>    0.00      0.06     0.00   284727     0.00     0.00  match_basename
>    0.00      0.06     0.00   259150     0.00     0.00  fspathncmp
>    0.00      0.06     0.00   178561     0.00     0.00  do_match_pathspec
>    0.00      0.06     0.00   178543     0.00     0.00  match_pathspec
>    0.00      0.06     0.00   145141     0.00     0.00  memory_limit_check
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 0/6] Fast git status via a file system watcher
  2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
                   ` (7 preceding siblings ...)
  2017-06-01 20:51 ` Ævar Arnfjörð Bjarmason
@ 2017-06-02  1:56 ` Junio C Hamano
  8 siblings, 0 replies; 29+ messages in thread
From: Junio C Hamano @ 2017-06-02  1:56 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, benpeart, pclouds, johannes.schindelin, David.Turner, peff,
	christian.couder, avarab

Ben Peart <peartben@gmail.com> writes:

> Changes from V3 include:
>  - update test script based on feedback
>  - update template hook proc with better post-processing code and make
>    it executable

I take it that the first three patches are the same as before,
so is the "add documentation" one [5/6].

Will replace; Ævar seems to have some interesting numbers, so I'll
wait a bit more to see how the discussion goes.

Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02  0:40     ` Ben Peart
@ 2017-06-02 10:28       ` Ævar Arnfjörð Bjarmason
  2017-06-02 21:44         ` David Turner
                           ` (2 more replies)
  0 siblings, 3 replies; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-02 10:28 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Ben Peart, Nguyễn Thái Ngọc Duy,
	Johannes Schindelin, David Turner, Jeff King, Christian Couder,
	Ævar Arnfjörð Bjarmason

Add a performance test for the new core.fsmonitor facility using the
sample query-fsmonitor hook.

This is WIP code for the reasons explained in the setup comments,
unfortunately the perf code doesn't easily allow you to run different
setup code for different versions you're testing. This test will stop
working if the fsmonitor is merged into the master branch.

Output against linxu.git:

    $ GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8' ./run origin/master avar/fsmonitor ./p7519-fsmonitor.sh
    [...]
    Test                          origin/master     avar/fsmonitor
    -----------------------------------------------------------------------
    7519.2: status (first)        0.08(0.04+0.09)   0.12(0.07+0.10) +50.0%
    7519.3: status (subsequent)   0.08(0.04+0.09)   0.12(0.06+0.11) +50.0%
    7519.4: status -uno           0.02(0.02+0.05)   0.06(0.05+0.06) +200.0%
    7519.5: status -uall          0.08(0.06+0.07)   0.12(0.07+0.10) +50.0%

And against a larger in-house monorepo I have here, with the same
options (except the repo path):

    Test                          origin/master     avar/fsmonitor
    -----------------------------------------------------------------------
    7519.2: status (first)        0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
    7519.3: status (subsequent)   0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
    7519.4: status -uno           0.04(0.03+0.10)   0.22(0.08+0.12) +450.0%
    7519.5: status -uall          0.20(0.13+0.16)   0.27(0.18+0.19) +35.0%

Against linux.git with a hack to flush the FS cache (on Linux) before
running the first 'git status', only running one test so the result
isn't discarded as the slowest of N:

    $ GIT_PERF_REPEAT_COUNT=1 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null && make -j8' ./run origin/master avar/fsmonitor ./p7519-fsmonitor.sh
    [...]
    Test                          origin/master     avar/fsmonitor
    ------------------------------------------------------------------------
    7519.2: status (first)        0.30(0.18+0.10)   8.26(0.22+0.10) +2653.3%
    7519.3: status (subsequent)   0.08(0.04+0.08)   0.81(0.09+0.07) +912.5%
    7519.4: status -uno           0.02(0.01+0.06)   0.08(0.04+0.07) +300.0%
    7519.5: status -uall          0.08(0.06+0.07)   0.15(0.08+0.09) +87.5%

Now obviously due to 1 run that has a lot of noise, but I would expect
that first invocation to be blindingly fast since watchman has info on
what files were modified since the cache was flushed.

The same on the large monorepo noted above:

    Test                          origin/master     avar/fsmonitor
    -----------------------------------------------------------------------
    7519.2: status (first)        0.59(0.28+0.24)   0.93(0.35+0.19) +57.6%
    7519.3: status (subsequent)   0.20(0.10+0.19)   0.28(0.16+0.20) +40.0%
    7519.4: status -uno           0.04(0.04+0.09)   0.11(0.08+0.12) +175.0%
    7519.5: status -uall          0.29(0.11+0.18)   0.40(0.16+0.19) +37.9%

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---


On Fri, Jun 2, 2017 at 2:40 AM, Ben Peart <peartben@gmail.com> wrote:
> Any chance you can provide me with a bash script that contains the exact
> sequence of commands you are running to get this result?  I've been trying
> to replicate it using your notes but have not been able to.  I'd like to see
> if it is a repo difference, a platform difference, a command sequence
> difference (or something else entirely :)).

I can do better than that, here's a new perf test on top of this
series which demonstates the issue. I've only tested this on Linux
4.9.0 with watchman 4.9.0 compiled from git (yes, they're
coincidentally the same version).

A good addition to this would be `printf <fmt for date N sec in the
past> | watchman -j` as noted in my earlier mail, but I ran out of
time.

You can also set any combination of GIT_PERF_7519_UNTRACKED_CACHE &
GIT_PERF_7519_SPLIT_INDEX to play with turning that on. I haven't
tested all combinations of that, but e.g. testing with untrackedCache
didn't give results that looked different from the performance
regressions noted above.

Aside from performance, I think a very good addition to stress-test
this series would be a patch to t/test-lib*sh guarded by some env flag
to do a similar watchman watch-del/watch/watch-list dance as the one
I'm doing here in the setup, and setting up the hook / config.

That would allow testing the entire git test suite with this feature,
to find any subtle bugs this might have introduced in git-status.

 t/perf/p7519-fsmonitor.sh | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100755 t/perf/p7519-fsmonitor.sh

diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh
new file mode 100755
index 0000000000..b838a0ff14
--- /dev/null
+++ b/t/perf/p7519-fsmonitor.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+
+test_description="Test core.fsmonitor"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+test_expect_success 'setup' '
+	# Maybe set untrackedCache & splitIndex depending on the
+	# environment, defaulting to false.
+	if test -n "$GIT_PERF_7519_UNTRACKED_CACHE"
+	then
+		git config core.untrackedCache true
+	else
+		git config core.untrackedCache false
+	fi &&
+	if test -n "$GIT_PERF_7519_SPLIT_INDEX"
+	then
+		git config core.splitIndex true
+	else
+		git config core.splitIndex false
+	fi &&
+
+	# Relies on core.fsmonitor not being merged into master. Needs
+	# better per-test ways to disable it if it gets merged.
+	git config core.fsmonitor true &&
+
+	# Hook scaffolding
+	mkdir .git/hooks &&
+	cp ../../../templates/hooks--query-fsmonitor.sample .git/hooks/query-fsmonitor &&
+
+	# Setup watchman & ensure it is actually watching
+	watchman watch-del "$PWD" >/dev/null 2>&1 &&
+	watchman watch "$PWD" >/dev/null 2>&1 &&
+	watchman watch-list | grep -q -F "$PWD"
+'
+
+# Setting:
+#
+#    GIT_PERF_REPEAT_COUNT=1 GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 | sudo tee /proc/sys/vm/drop_caches && make -j8'
+#
+# Can be used as a hack to performance test 'git status' on a cold fs
+# cache with an existing watchman watching the directory, which should
+# be blindingly fast, compared to amazingly slow without watchman.
+test_perf 'status (first)'       'git status'
+
+
+# The same git-status once the fs cache has been warmed, if using the
+# GIT_PERF_MAKE_COMMAND above. Otherwise the same as above.
+test_perf 'status (subsequent)'  'git status'
+
+# Let's see if -uno & -uall make any difference
+test_perf 'status -uno'          'git status -uno'
+test_perf 'status -uall'         'git status -uall'
+
+test_done
-- 
2.13.0.506.g27d5fe0cd


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* RE: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 10:28       ` [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor Ævar Arnfjörð Bjarmason
@ 2017-06-02 21:44         ` David Turner
  2017-06-03 18:08           ` Ævar Arnfjörð Bjarmason
  2017-06-05 14:27           ` Ben Peart
  2017-06-02 22:05         ` Ben Peart
  2017-06-04  1:59         ` Junio C Hamano
  2 siblings, 2 replies; 29+ messages in thread
From: David Turner @ 2017-06-02 21:44 UTC (permalink / raw)
  To: 'Ævar Arnfjörð Bjarmason',
	git@vger.kernel.org
  Cc: Junio C Hamano, Ben Peart, Nguyễn Thái Ngọc Duy,
	Johannes Schindelin, Jeff King, Christian Couder

BTW, a medium-sized (~250k files across 40k dirs) synthetic repo is available over bittorrent at:
http://bitmover.com/2015-04-03-1M-git-bare.tar.bz2.torrent

I tried Ævar's perf test with that (on a beefy laptop with SSD), and got significantly slower results with bp/fsmonitor:
Test                          origin/master     bp/fsmonitor           
-----------------------------------------------------------------------
7519.2: status (first)        0.32(0.23+0.39)   0.32(0.26+0.36) +0.0%  
7519.3: status (subsequent)   0.18(0.14+0.34)   0.32(0.24+0.37) +77.8% 
7519.4: status -uno           0.11(0.08+0.32)   0.24(0.18+0.34) +118.2%
7519.5: status -uall          0.49(0.22+0.56)   0.62(0.36+0.55) +26.5% 
7519.2: status (first)        0.32(0.23+0.39)   0.32(0.26+0.36) +0.0%  
7519.3: status (subsequent)   0.18(0.14+0.34)   0.32(0.24+0.37) +77.8% 
7519.4: status -uno           0.11(0.08+0.32)   0.24(0.18+0.34) +118.2%
7519.5: status -uall          0.49(0.22+0.56)   0.62(0.36+0.55) +26.5% 

I have not yet looked into why this is.

> -----Original Message-----
> From: Ævar Arnfjörð Bjarmason [mailto:avarab@gmail.com]
> Sent: Friday, June 2, 2017 6:29 AM
> To: git@vger.kernel.org
> Cc: Junio C Hamano <gitster@pobox.com>; Ben Peart
> <peartben@gmail.com>; Nguyễn Thái Ngọc Duy <pclouds@gmail.com>;
> Johannes Schindelin <johannes.schindelin@gmx.de>; David Turner
> <David.Turner@twosigma.com>; Jeff King <peff@peff.net>; Christian
> Couder <christian.couder@gmail.com>; Ævar Arnfjörð Bjarmason
> <avarab@gmail.com>
> Subject: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
> 
> Add a performance test for the new core.fsmonitor facility using the sample
> query-fsmonitor hook.
> 
> This is WIP code for the reasons explained in the setup comments,
> unfortunately the perf code doesn't easily allow you to run different setup
> code for different versions you're testing. This test will stop working if the
> fsmonitor is merged into the master branch.
> 
> Output against linxu.git:
> 
>     $ GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux
> GIT_PERF_MAKE_OPTS='-j8' ./run origin/master avar/fsmonitor ./p7519-
> fsmonitor.sh
>     [...]
>     Test                          origin/master     avar/fsmonitor
>     -----------------------------------------------------------------------
>     7519.2: status (first)        0.08(0.04+0.09)   0.12(0.07+0.10) +50.0%
>     7519.3: status (subsequent)   0.08(0.04+0.09)   0.12(0.06+0.11) +50.0%
>     7519.4: status -uno           0.02(0.02+0.05)   0.06(0.05+0.06) +200.0%
>     7519.5: status -uall          0.08(0.06+0.07)   0.12(0.07+0.10) +50.0%
> 
> And against a larger in-house monorepo I have here, with the same options
> (except the repo path):
> 
>     Test                          origin/master     avar/fsmonitor
>     -----------------------------------------------------------------------
>     7519.2: status (first)        0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
>     7519.3: status (subsequent)   0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
>     7519.4: status -uno           0.04(0.03+0.10)   0.22(0.08+0.12) +450.0%
>     7519.5: status -uall          0.20(0.13+0.16)   0.27(0.18+0.19) +35.0%
> 
> Against linux.git with a hack to flush the FS cache (on Linux) before running
> the first 'git status', only running one test so the result isn't discarded as the
> slowest of N:
> 
>     $ GIT_PERF_REPEAT_COUNT=1 GIT_PERF_LARGE_REPO=~/g/linux
> GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 | sudo tee
> /proc/sys/vm/drop_caches >/dev/null && make -j8' ./run origin/master
> avar/fsmonitor ./p7519-fsmonitor.sh
>     [...]
>     Test                          origin/master     avar/fsmonitor
>     ------------------------------------------------------------------------
>     7519.2: status (first)        0.30(0.18+0.10)   8.26(0.22+0.10) +2653.3%
>     7519.3: status (subsequent)   0.08(0.04+0.08)   0.81(0.09+0.07) +912.5%
>     7519.4: status -uno           0.02(0.01+0.06)   0.08(0.04+0.07) +300.0%
>     7519.5: status -uall          0.08(0.06+0.07)   0.15(0.08+0.09) +87.5%
> 
> Now obviously due to 1 run that has a lot of noise, but I would expect that
> first invocation to be blindingly fast since watchman has info on what files
> were modified since the cache was flushed.
> 
> The same on the large monorepo noted above:
> 
>     Test                          origin/master     avar/fsmonitor
>     -----------------------------------------------------------------------
>     7519.2: status (first)        0.59(0.28+0.24)   0.93(0.35+0.19) +57.6%
>     7519.3: status (subsequent)   0.20(0.10+0.19)   0.28(0.16+0.20) +40.0%
>     7519.4: status -uno           0.04(0.04+0.09)   0.11(0.08+0.12) +175.0%
>     7519.5: status -uall          0.29(0.11+0.18)   0.40(0.16+0.19) +37.9%
> 
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> 
> 
> On Fri, Jun 2, 2017 at 2:40 AM, Ben Peart <peartben@gmail.com> wrote:
> > Any chance you can provide me with a bash script that contains the
> > exact sequence of commands you are running to get this result?  I've
> > been trying to replicate it using your notes but have not been able
> > to.  I'd like to see if it is a repo difference, a platform
> > difference, a command sequence difference (or something else entirely
> :)).
> 
> I can do better than that, here's a new perf test on top of this series which
> demonstates the issue. I've only tested this on Linux
> 4.9.0 with watchman 4.9.0 compiled from git (yes, they're coincidentally the
> same version).
> 
> A good addition to this would be `printf <fmt for date N sec in the
> past> | watchman -j` as noted in my earlier mail, but I ran out of
> time.
> 
> You can also set any combination of GIT_PERF_7519_UNTRACKED_CACHE &
> GIT_PERF_7519_SPLIT_INDEX to play with turning that on. I haven't tested all
> combinations of that, but e.g. testing with untrackedCache didn't give results
> that looked different from the performance regressions noted above.
> 
> Aside from performance, I think a very good addition to stress-test this series
> would be a patch to t/test-lib*sh guarded by some env flag to do a similar
> watchman watch-del/watch/watch-list dance as the one I'm doing here in
> the setup, and setting up the hook / config.
> 
> That would allow testing the entire git test suite with this feature, to find any
> subtle bugs this might have introduced in git-status.
> 
>  t/perf/p7519-fsmonitor.sh | 58
> +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
>  create mode 100755 t/perf/p7519-fsmonitor.sh
> 
> diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh new file
> mode 100755 index 0000000000..b838a0ff14
> --- /dev/null
> +++ b/t/perf/p7519-fsmonitor.sh
> @@ -0,0 +1,58 @@
> +#!/bin/sh
> +
> +test_description="Test core.fsmonitor"
> +
> +. ./perf-lib.sh
> +
> +test_perf_large_repo
> +test_checkout_worktree
> +
> +test_expect_success 'setup' '
> +	# Maybe set untrackedCache & splitIndex depending on the
> +	# environment, defaulting to false.
> +	if test -n "$GIT_PERF_7519_UNTRACKED_CACHE"
> +	then
> +		git config core.untrackedCache true
> +	else
> +		git config core.untrackedCache false
> +	fi &&
> +	if test -n "$GIT_PERF_7519_SPLIT_INDEX"
> +	then
> +		git config core.splitIndex true
> +	else
> +		git config core.splitIndex false
> +	fi &&
> +
> +	# Relies on core.fsmonitor not being merged into master. Needs
> +	# better per-test ways to disable it if it gets merged.
> +	git config core.fsmonitor true &&
> +
> +	# Hook scaffolding
> +	mkdir .git/hooks &&
> +	cp ../../../templates/hooks--query-fsmonitor.sample
> +.git/hooks/query-fsmonitor &&
> +
> +	# Setup watchman & ensure it is actually watching
> +	watchman watch-del "$PWD" >/dev/null 2>&1 &&
> +	watchman watch "$PWD" >/dev/null 2>&1 &&
> +	watchman watch-list | grep -q -F "$PWD"
> +'
> +
> +# Setting:
> +#
> +#    GIT_PERF_REPEAT_COUNT=1 GIT_PERF_MAKE_COMMAND='sudo sync
> && echo 3 | sudo tee /proc/sys/vm/drop_caches && make -j8'
> +#
> +# Can be used as a hack to performance test 'git status' on a cold fs #
> +cache with an existing watchman watching the directory, which should #
> +be blindingly fast, compared to amazingly slow without watchman.
> +test_perf 'status (first)'       'git status'
> +
> +
> +# The same git-status once the fs cache has been warmed, if using the #
> +GIT_PERF_MAKE_COMMAND above. Otherwise the same as above.
> +test_perf 'status (subsequent)'  'git status'
> +
> +# Let's see if -uno & -uall make any difference
> +test_perf 'status -uno'          'git status -uno'
> +test_perf 'status -uall'         'git status -uall'
> +
> +test_done
> --
> 2.13.0.506.g27d5fe0cd


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 10:28       ` [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor Ævar Arnfjörð Bjarmason
  2017-06-02 21:44         ` David Turner
@ 2017-06-02 22:05         ` Ben Peart
  2017-06-02 23:06           ` Ævar Arnfjörð Bjarmason
  2017-06-04  1:59         ` Junio C Hamano
  2 siblings, 1 reply; 29+ messages in thread
From: Ben Peart @ 2017-06-02 22:05 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason, git
  Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy,
	Johannes Schindelin, David Turner, Jeff King, Christian Couder,
	benpeart



On 6/2/2017 6:28 AM, Ævar Arnfjörð Bjarmason wrote:
> Add a performance test for the new core.fsmonitor facility using the
> sample query-fsmonitor hook.
> 
> This is WIP code for the reasons explained in the setup comments,
> unfortunately the perf code doesn't easily allow you to run different
> setup code for different versions you're testing. This test will stop
> working if the fsmonitor is merged into the master branch.
> 
> Output against linxu.git:
> 
>      $ GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_OPTS='-j8' ./run origin/master avar/fsmonitor ./p7519-fsmonitor.sh
>      [...]
>      Test                          origin/master     avar/fsmonitor
>      -----------------------------------------------------------------------
>      7519.2: status (first)        0.08(0.04+0.09)   0.12(0.07+0.10) +50.0%
>      7519.3: status (subsequent)   0.08(0.04+0.09)   0.12(0.06+0.11) +50.0%
>      7519.4: status -uno           0.02(0.02+0.05)   0.06(0.05+0.06) +200.0%
>      7519.5: status -uall          0.08(0.06+0.07)   0.12(0.07+0.10) +50.0%
> 

With regular status times this low, the overhead of calling the hook + 
watchman + perl results in slower overall times as I noted in my initial 
cover letter.  If status calls are already this fast, fsmonitor + 
watchman isn't needed and obviously doesn't help.

This does highlight an optimization I could add.  Even though -uno is 
passed, the fsmonitor code currently still goes through the work of 
marking the untracked entries as dirty.  I'll look at optimizing that 
out to speed status up when using that option.

> And against a larger in-house monorepo I have here, with the same
> options (except the repo path):
> 
>      Test                          origin/master     avar/fsmonitor
>      -----------------------------------------------------------------------
>      7519.2: status (first)        0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
>      7519.3: status (subsequent)   0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
>      7519.4: status -uno           0.04(0.03+0.10)   0.22(0.08+0.12) +450.0%
>      7519.5: status -uall          0.20(0.13+0.16)   0.27(0.18+0.19) +35.0%
> 
> Against linux.git with a hack to flush the FS cache (on Linux) before
> running the first 'git status', only running one test so the result
> isn't discarded as the slowest of N:
> 

I don't know know about on Linux but with Windows, when you flush the 
file system cache via unmount/mount, it causes Watchman to do a full 
scan with the next query.  This has a significant negative performance 
impact on the next status call as it returns the set of _all_ files 
which git then uses to find and mark all index and untracked cache 
entries as fsmonitor dirty then does a complete scan.  This combination 
makes the first status after these events slower than without Watchman.

I'm currently working with the Watchman team to return a code indicating 
we should just scan everything ourselves to avoid the extra overhead, 
but that solution is TBD.  Given it only happens the first time a query 
is done on a new clone or the first time after watchman is started, I 
didn't intend to hold up the patch series for it but will submit an 
enhanced version once a solution is available.  The enhanced version 
will then be the same performance (for the first status call) as when 
not running with fsmonitor - not faster - as the state is unknown so 
must be gathered from the working directory.

>      $ GIT_PERF_REPEAT_COUNT=1 GIT_PERF_LARGE_REPO=~/g/linux GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null && make -j8' ./run origin/master avar/fsmonitor ./p7519-fsmonitor.sh
>      [...]
>      Test                          origin/master     avar/fsmonitor
>      ------------------------------------------------------------------------
>      7519.2: status (first)        0.30(0.18+0.10)   8.26(0.22+0.10) +2653.3%
>      7519.3: status (subsequent)   0.08(0.04+0.08)   0.81(0.09+0.07) +912.5%
>      7519.4: status -uno           0.02(0.01+0.06)   0.08(0.04+0.07) +300.0%
>      7519.5: status -uall          0.08(0.06+0.07)   0.15(0.08+0.09) +87.5%
> 
> Now obviously due to 1 run that has a lot of noise, but I would expect
> that first invocation to be blindingly fast since watchman has info on
> what files were modified since the cache was flushed.
> 

Every (first) run of the performance test will be very expensive for the 
reasons outlined above.  This is clearest when you only have 1 run as it 
doesn't get masked by the faster 2nd+ runs.  That first run will never 
be "blindingly fast" as git has to gather the initial state and save out 
the cache - it's all the subsequent calls that will be faster.

> The same on the large monorepo noted above:
> 
>      Test                          origin/master     avar/fsmonitor
>      -----------------------------------------------------------------------
>      7519.2: status (first)        0.59(0.28+0.24)   0.93(0.35+0.19) +57.6%
>      7519.3: status (subsequent)   0.20(0.10+0.19)   0.28(0.16+0.20) +40.0%
>      7519.4: status -uno           0.04(0.04+0.09)   0.11(0.08+0.12) +175.0%
>      7519.5: status -uall          0.29(0.11+0.18)   0.40(0.16+0.19) +37.9%
> 
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
> ---
> 

Same issue here - the overhead of the hook + watchman + perl outweighs 
any savings for these status calls that are already so fast.

There is certainly noise from run to run but on my machine, the minimum 
time I saw with fsmonitor was in the .25+ range.  Where fsmonitor really 
becomes useful is when status times get over a second and in extreme 
cases where it gets to a minute or more.
> On Fri, Jun 2, 2017 at 2:40 AM, Ben Peart <peartben@gmail.com> wrote:
>> Any chance you can provide me with a bash script that contains the exact
>> sequence of commands you are running to get this result?  I've been trying
>> to replicate it using your notes but have not been able to.  I'd like to see
>> if it is a repo difference, a platform difference, a command sequence
>> difference (or something else entirely :)).
> 
> I can do better than that, here's a new perf test on top of this
> series which demonstates the issue. I've only tested this on Linux
> 4.9.0 with watchman 4.9.0 compiled from git (yes, they're
> coincidentally the same version).
> 

Thank you!  Great addition.  I've included an updated version below.

> A good addition to this would be `printf <fmt for date N sec in the
> past> | watchman -j` as noted in my earlier mail, but I ran out of
> time.
> 
> You can also set any combination of GIT_PERF_7519_UNTRACKED_CACHE &
> GIT_PERF_7519_SPLIT_INDEX to play with turning that on. I haven't
> tested all combinations of that, but e.g. testing with untrackedCache
> didn't give results that looked different from the performance
> regressions noted above.
> 

Makes sense, given how the performance framework creates the test repo, 
there typically aren't untracked files to optimize.  I updated the perf 
test to turn it on anyway for platforms where it is available.

> Aside from performance, I think a very good addition to stress-test
> this series would be a patch to t/test-lib*sh guarded by some env flag
> to do a similar watchman watch-del/watch/watch-list dance as the one
> I'm doing here in the setup, and setting up the hook / config.
> That would allow testing the entire git test suite with this feature,
> to find any subtle bugs this might have introduced in git-status.
> 

Another good idea.  I had started that but ran into bugs with Watchman 
on Windows (https://github.com/facebook/watchman/issues/461) where 
running the git test suite actually causes Watchman to stop monitoring 
folders.  Once I can get past that, I'll give it another go.



diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh
new file mode 100755
index 0000000000..e658254ecf
--- /dev/null
+++ b/t/perf/p7519-fsmonitor.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+test_description="Test core.fsmonitor"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+# fsmonitor works correctly with or without the untracked cache
+# but if it is available, we'll turn it on to ensure we test that
+# codepath as well.
+
+test_lazy_prereq UNTRACKED_CACHE '
+       { git update-index --test-untracked-cache; ret=$?; } &&
+       test $ret -ne 1
+'
+
+if test_have_prereq UNTRACKED_CACHE; then
+       git config core.untrackedcache true
+else
+       git config core.untrackedcache false
+fi
+
+test_expect_success 'setup' '
+       # set splitIndex depending on the environment, defaulting to false.
+       if test -n "$GIT_PERF_7519_SPLIT_INDEX"
+       then
+               git config core.splitIndex true
+       else
+               git config core.splitIndex false
+       fi &&
+
+       # Hook scaffolding
+       mkdir .git/hooks &&
+       cp ../../../templates/hooks--query-fsmonitor.sample 
.git/hooks/query-fsmonitor &&
+
+       watchman watch "$PWD" >/dev/null 2>&1 &&
+       watchman watch-list | grep -q -F "$PWD"
+'
+
+# Setting:
+#
+#    GIT_PERF_REPEAT_COUNT=1 GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 
| sudo tee /proc/sys/vm/dro
p_caches && make -j8'
+#
+# Can be used as a hack to performance test 'git status' on a cold fs
+# cache with an existing watchman watching the directory, which should
+# be blindingly fast, compared to amazingly slow without watchman.
+
+# Run git status without using fsmonitor as a baseline.
+test_perf 'status (without fsmonitor)' 'git -c core.fsmonitor=false status'
+
+# The first time you query Watchman after the daemon starts or the first
+# call after adding a new folder to watch, it scans the folder and returns
+# _all_ files which makes the initial status call slower than without.
+# Working with Watchman team to return a code indicating we should just
+# scan everything ourselves to avoid the extra overhead, solution TBD.
+test_perf 'status (populate cache)' 'git -c core.fsmonitor=true status'
+
+# Now Watchman has done the initial scan and the fsmonitor index extension
+# is populated so we'll see the performance win.
+test_perf 'status (with fsmonitor)' 'git -c core.fsmonitor=true status'
+
+# Let's see if -uno & -uall make any difference
+test_perf 'status -uno'          'git -c core.fsmonitor=true status -uno'
+test_perf 'status -uall'         'git -c core.fsmonitor=true status -uall'
+
+test_done



^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 22:05         ` Ben Peart
@ 2017-06-02 23:06           ` Ævar Arnfjörð Bjarmason
  2017-06-07 19:51             ` Ben Peart
  0 siblings, 1 reply; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-02 23:06 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder, Ben Peart

On Sat, Jun 3, 2017 at 12:05 AM, Ben Peart <peartben@gmail.com> wrote:
>
>
> On 6/2/2017 6:28 AM, Ævar Arnfjörð Bjarmason wrote:
>>
>> Add a performance test for the new core.fsmonitor facility using the
>> sample query-fsmonitor hook.
>>
>> This is WIP code for the reasons explained in the setup comments,
>> unfortunately the perf code doesn't easily allow you to run different
>> setup code for different versions you're testing. This test will stop
>> working if the fsmonitor is merged into the master branch.
>>
>> Output against linxu.git:
>>
>>      $ GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux
>> GIT_PERF_MAKE_OPTS='-j8' ./run origin/master avar/fsmonitor
>> ./p7519-fsmonitor.sh
>>      [...]
>>      Test                          origin/master     avar/fsmonitor
>>
>> -----------------------------------------------------------------------
>>      7519.2: status (first)        0.08(0.04+0.09)   0.12(0.07+0.10)
>> +50.0%
>>      7519.3: status (subsequent)   0.08(0.04+0.09)   0.12(0.06+0.11)
>> +50.0%
>>      7519.4: status -uno           0.02(0.02+0.05)   0.06(0.05+0.06)
>> +200.0%
>>      7519.5: status -uall          0.08(0.06+0.07)   0.12(0.07+0.10)
>> +50.0%
>>
>
> With regular status times this low, the overhead of calling the hook +
> watchman + perl results in slower overall times as I noted in my initial
> cover letter.  If status calls are already this fast, fsmonitor + watchman
> isn't needed and obviously doesn't help.
>
> This does highlight an optimization I could add.  Even though -uno is
> passed, the fsmonitor code currently still goes through the work of marking
> the untracked entries as dirty.  I'll look at optimizing that out to speed
> status up when using that option.
>
>> And against a larger in-house monorepo I have here, with the same
>> options (except the repo path):
>>
>>      Test                          origin/master     avar/fsmonitor
>>
>> -----------------------------------------------------------------------
>>      7519.2: status (first)        0.20(0.11+0.18)   0.27(0.15+0.21)
>> +35.0%
>>      7519.3: status (subsequent)   0.20(0.11+0.18)   0.27(0.15+0.21)
>> +35.0%
>>      7519.4: status -uno           0.04(0.03+0.10)   0.22(0.08+0.12)
>> +450.0%
>>      7519.5: status -uall          0.20(0.13+0.16)   0.27(0.18+0.19)
>> +35.0%
>>
>> Against linux.git with a hack to flush the FS cache (on Linux) before
>> running the first 'git status', only running one test so the result
>> isn't discarded as the slowest of N:
>>
>
> I don't know know about on Linux but with Windows, when you flush the file
> system cache via unmount/mount, it causes Watchman to do a full scan with
> the next query.  This has a significant negative performance impact on the
> next status call as it returns the set of _all_ files which git then uses to
> find and mark all index and untracked cache entries as fsmonitor dirty then
> does a complete scan.  This combination makes the first status after these
> events slower than without Watchman.

I'm not sure, but I don't think this is a thing at all on Linux. How
inotify works is unrelated to whether some part of the filesystem
happens to be cached, and the echo+proc command I have is just
flushing all those pages. That I'm flushing all the pages at once
should be as much of a non-event as one page being claimed from the
fscache for use by the RAM allocation of some application.

I suspect Windows somehow conflates its fs watching implementation
with what on Linux is the inotify queue the application has to
consume, if that queue isn't consumed watchman will need to re-scan.

Or maybe my mental model of this whole thing is wrong and this also
happens on Linux...

> I'm currently working with the Watchman team to return a code indicating we
> should just scan everything ourselves to avoid the extra overhead, but that
> solution is TBD.  Given it only happens the first time a query is done on a
> new clone or the first time after watchman is started, I didn't intend to
> hold up the patch series for it but will submit an enhanced version once a
> solution is available.  The enhanced version will then be the same
> performance (for the first status call) as when not running with fsmonitor -
> not faster - as the state is unknown so must be gathered from the working
> directory.

I don't have time to update the perf test now or dig into it, but most
of what you're describing in this mail doesn't at all match with the
ad-hoc tests I ran in
https://public-inbox.org/git/CACBZZX5e58bWuf3NdDYTxu2KyZj29hHONzN=rp-7vXd8nURyWQ@mail.gmail.com/

There (at the very end of the E-Mail) I'm running watchman in a tight
loop while I flush the entire fs cache, its runtime is never longer
than 600ms, with 3ms being the norm.

I.e. flushing the cache doesn't slow things down much at all compared
to how long a "git status" takes from cold cache. Something else must
be going on, and the smoking gun is the gprof output I posted in the
follow-up E-Mail:
https://public-inbox.org/git/CACBZZX4eZ3G8LQ8O+_BkbkJ-ZXTOkUi9cW=QKYjfHKtmA3pgrA@mail.gmail.com/

There with the fsmonitor we end up calling blk_SHA1_Block ~100K times
during "status", but IIRC (I don't have the output in front of me,
this is from memory) something like twenty times without the
fsmonitor.

It can't be a coincidence that with the fscache:

$ pwd; git ls-files | wc -l
/home/avar/g/linux
59844

And you can see that in the fsmonitor "git status" we make exactly
that many calls to cache_entry_from_ondisk(), but those calls don't
show up at all in the non-fscache codepath.

So, again, I haven't dug and really must step away from the computer
now, but this really looks like the fscache saves us the recursive
readdir() / lstat() etc, but in return we somehow fall though to a
codepath where we re-read the entire on-disk state back into the
index, which we don't do in the non-fscache codepath.

>>      $ GIT_PERF_REPEAT_COUNT=1 GIT_PERF_LARGE_REPO=~/g/linux
>> GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 | sudo tee
>> /proc/sys/vm/drop_caches >/dev/null && make -j8' ./run origin/master
>> avar/fsmonitor ./p7519-fsmonitor.sh
>>      [...]
>>      Test                          origin/master     avar/fsmonitor
>>
>> ------------------------------------------------------------------------
>>      7519.2: status (first)        0.30(0.18+0.10)   8.26(0.22+0.10)
>> +2653.3%
>>      7519.3: status (subsequent)   0.08(0.04+0.08)   0.81(0.09+0.07)
>> +912.5%
>>      7519.4: status -uno           0.02(0.01+0.06)   0.08(0.04+0.07)
>> +300.0%
>>      7519.5: status -uall          0.08(0.06+0.07)   0.15(0.08+0.09)
>> +87.5%
>>
>> Now obviously due to 1 run that has a lot of noise, but I would expect
>> that first invocation to be blindingly fast since watchman has info on
>> what files were modified since the cache was flushed.
>>
>
> Every (first) run of the performance test will be very expensive for the
> reasons outlined above.  This is clearest when you only have 1 run as it
> doesn't get masked by the faster 2nd+ runs.  That first run will never be
> "blindingly fast" as git has to gather the initial state and save out the
> cache - it's all the subsequent calls that will be faster.
>
>> The same on the large monorepo noted above:
>>
>>      Test                          origin/master     avar/fsmonitor
>>
>> -----------------------------------------------------------------------
>>      7519.2: status (first)        0.59(0.28+0.24)   0.93(0.35+0.19)
>> +57.6%
>>      7519.3: status (subsequent)   0.20(0.10+0.19)   0.28(0.16+0.20)
>> +40.0%
>>      7519.4: status -uno           0.04(0.04+0.09)   0.11(0.08+0.12)
>> +175.0%
>>      7519.5: status -uall          0.29(0.11+0.18)   0.40(0.16+0.19)
>> +37.9%
>>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>>
>
> Same issue here - the overhead of the hook + watchman + perl outweighs any
> savings for these status calls that are already so fast.
>
> There is certainly noise from run to run but on my machine, the minimum time
> I saw with fsmonitor was in the .25+ range.  Where fsmonitor really becomes
> useful is when status times get over a second and in extreme cases where it
> gets to a minute or more.
>>
>> On Fri, Jun 2, 2017 at 2:40 AM, Ben Peart <peartben@gmail.com> wrote:
>>>
>>> Any chance you can provide me with a bash script that contains the exact
>>> sequence of commands you are running to get this result?  I've been
>>> trying
>>> to replicate it using your notes but have not been able to.  I'd like to
>>> see
>>> if it is a repo difference, a platform difference, a command sequence
>>> difference (or something else entirely :)).
>>
>>
>> I can do better than that, here's a new perf test on top of this
>> series which demonstates the issue. I've only tested this on Linux
>> 4.9.0 with watchman 4.9.0 compiled from git (yes, they're
>> coincidentally the same version).
>>
>
> Thank you!  Great addition.  I've included an updated version below.
>
>> A good addition to this would be `printf <fmt for date N sec in the
>> past> | watchman -j` as noted in my earlier mail, but I ran out of
>> time.
>>
>> You can also set any combination of GIT_PERF_7519_UNTRACKED_CACHE &
>> GIT_PERF_7519_SPLIT_INDEX to play with turning that on. I haven't
>> tested all combinations of that, but e.g. testing with untrackedCache
>> didn't give results that looked different from the performance
>> regressions noted above.
>>
>
> Makes sense, given how the performance framework creates the test repo,
> there typically aren't untracked files to optimize.  I updated the perf test
> to turn it on anyway for platforms where it is available.

The unfortunately named core.untrackedCache option has almost nothing
to do with untracked files per-se. It's about caching mtimes of
directories on the fs to avoid looking at them the next time, here on
a linux.git with no untracked files whatsoever:

    $ strace git -c core.untrackedCache=false status 2>&1 | wc -l
    22780
    $ strace git -c core.untrackedCache=true status 2>&1 | wc -l
    5105

I.e. syscalls are cut down by 4x. This will/can matter in an isolated
performance test like this.

It should really have been called core.mtimeCache or something like that...

>> Aside from performance, I think a very good addition to stress-test
>> this series would be a patch to t/test-lib*sh guarded by some env flag
>> to do a similar watchman watch-del/watch/watch-list dance as the one
>> I'm doing here in the setup, and setting up the hook / config.
>> That would allow testing the entire git test suite with this feature,
>> to find any subtle bugs this might have introduced in git-status.
>>
>
> Another good idea.  I had started that but ran into bugs with Watchman on
> Windows (https://github.com/facebook/watchman/issues/461) where running the
> git test suite actually causes Watchman to stop monitoring folders.  Once I
> can get past that, I'll give it another go.
>
>
>
> diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh
> new file mode 100755
> index 0000000000..e658254ecf
> --- /dev/null
> +++ b/t/perf/p7519-fsmonitor.sh
> @@ -0,0 +1,68 @@
> +#!/bin/sh
> +
> +test_description="Test core.fsmonitor"
> +
> +. ./perf-lib.sh
> +
> +test_perf_large_repo
> +test_checkout_worktree
> +
> +# fsmonitor works correctly with or without the untracked cache
> +# but if it is available, we'll turn it on to ensure we test that
> +# codepath as well.
> +
> +test_lazy_prereq UNTRACKED_CACHE '
> +       { git update-index --test-untracked-cache; ret=$?; } &&
> +       test $ret -ne 1
> +'
> +
> +if test_have_prereq UNTRACKED_CACHE; then
> +       git config core.untrackedcache true
> +else
> +       git config core.untrackedcache false
> +fi
> +
> +test_expect_success 'setup' '
> +       # set splitIndex depending on the environment, defaulting to false.
> +       if test -n "$GIT_PERF_7519_SPLIT_INDEX"
> +       then
> +               git config core.splitIndex true
> +       else
> +               git config core.splitIndex false
> +       fi &&
> +
> +       # Hook scaffolding
> +       mkdir .git/hooks &&
> +       cp ../../../templates/hooks--query-fsmonitor.sample
> .git/hooks/query-fsmonitor &&
> +
> +       watchman watch "$PWD" >/dev/null 2>&1 &&
> +       watchman watch-list | grep -q -F "$PWD"
> +'
> +
> +# Setting:
> +#
> +#    GIT_PERF_REPEAT_COUNT=1 GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 |
> sudo tee /proc/sys/vm/dro
> p_caches && make -j8'
> +#
> +# Can be used as a hack to performance test 'git status' on a cold fs
> +# cache with an existing watchman watching the directory, which should
> +# be blindingly fast, compared to amazingly slow without watchman.
> +
> +# Run git status without using fsmonitor as a baseline.
> +test_perf 'status (without fsmonitor)' 'git -c core.fsmonitor=false status'
> +
> +# The first time you query Watchman after the daemon starts or the first
> +# call after adding a new folder to watch, it scans the folder and returns
> +# _all_ files which makes the initial status call slower than without.
> +# Working with Watchman team to return a code indicating we should just
> +# scan everything ourselves to avoid the extra overhead, solution TBD.
> +test_perf 'status (populate cache)' 'git -c core.fsmonitor=true status'
> +
> +# Now Watchman has done the initial scan and the fsmonitor index extension
> +# is populated so we'll see the performance win.
> +test_perf 'status (with fsmonitor)' 'git -c core.fsmonitor=true status'
> +
> +# Let's see if -uno & -uall make any difference
> +test_perf 'status -uno'          'git -c core.fsmonitor=true status -uno'
> +test_perf 'status -uall'         'git -c core.fsmonitor=true status -uall'
> +
> +test_done
>
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 21:44         ` David Turner
@ 2017-06-03 18:08           ` Ævar Arnfjörð Bjarmason
  2017-06-05 14:27           ` Ben Peart
  1 sibling, 0 replies; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-03 18:08 UTC (permalink / raw)
  To: David Turner
  Cc: git@vger.kernel.org, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	Jeff King, Christian Couder

On Fri, Jun 2, 2017 at 11:44 PM, David Turner <David.Turner@twosigma.com> wrote:
> BTW, a medium-sized (~250k files across 40k dirs) synthetic repo is available over bittorrent at:
> http://bitmover.com/2015-04-03-1M-git-bare.tar.bz2.torrent

Can you seed this please? I've been trying to download that since
yesterday, still at 0%.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 10:28       ` [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor Ævar Arnfjörð Bjarmason
  2017-06-02 21:44         ` David Turner
  2017-06-02 22:05         ` Ben Peart
@ 2017-06-04  1:59         ` Junio C Hamano
  2017-06-04  7:46           ` Ævar Arnfjörð Bjarmason
  2 siblings, 1 reply; 29+ messages in thread
From: Junio C Hamano @ 2017-06-04  1:59 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: git, Ben Peart, Nguyễn Thái Ngọc Duy,
	Johannes Schindelin, David Turner, Jeff King, Christian Couder

Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:

> This is WIP code for the reasons explained in the setup comments,
> unfortunately the perf code doesn't easily allow you to run different
> setup code for different versions you're testing. This test will stop
> working if the fsmonitor is merged into the master branch.
> ...
> +
> +	# Relies on core.fsmonitor not being merged into master. Needs
> +	# better per-test ways to disable it if it gets merged.
> +	git config core.fsmonitor true &&

Will stop working and relies on not merged can be read but I cannot
read "why" explained, and I cannot quite guess what the reason is.

If the code to read the configuration is not there, setting this
would not have any effect.  If the code is there, setting this would
have effect (either talking fsmonitor helps or it hurts).

And I do not think we'd ever see a version of Git that always relies
on talking to fsmonitor, i.e. "git config core.fsmonitor false" is not
a way to disable it, so...

Puzzled.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-04  1:59         ` Junio C Hamano
@ 2017-06-04  7:46           ` Ævar Arnfjörð Bjarmason
  2017-06-04  8:21             ` Jeff King
  0 siblings, 1 reply; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-04  7:46 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git Mailing List, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder, Thomas Rast

On Sun, Jun 4, 2017 at 3:59 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Ævar Arnfjörð Bjarmason  <avarab@gmail.com> writes:
>
>> This is WIP code for the reasons explained in the setup comments,
>> unfortunately the perf code doesn't easily allow you to run different
>> setup code for different versions you're testing. This test will stop
>> working if the fsmonitor is merged into the master branch.
>> ...
>> +
>> +     # Relies on core.fsmonitor not being merged into master. Needs
>> +     # better per-test ways to disable it if it gets merged.
>> +     git config core.fsmonitor true &&
>
> Will stop working and relies on not merged can be read but I cannot
> read "why" explained, and I cannot quite guess what the reason is.
>
> If the code to read the configuration is not there, setting this
> would not have any effect.  If the code is there, setting this would
> have effect (either talking fsmonitor helps or it hurts).
>
> And I do not think we'd ever see a version of Git that always relies
> on talking to fsmonitor, i.e. "git config core.fsmonitor false" is not
> a way to disable it, so...
>
> Puzzled.

Sorry about the unclear brevity.

What I'm referring to is not a limitation of git (we'll always be able
to turn off core.fsmonitor), but a limitation of the perf framework.
There's no way to run a test like this:

    ./run master next -- p*.sh

And have some information passed to the test to apply different
runtime options to the test depending on master or next, or be to test
master twice, once with the fsmonitor, once without, which this
hypothetical feature would do:

    ./run master:"GIT_PERF_7519_NO_FSMONITOR=Y" master -- p*.sh

So right now the test works because there's no core.fsmonitor in
master, so turning it on all the time only impacts avar/fsmonitor, not
master.

I started trying to add this to the perf framework the other day but
ran out of time, the option should also be passed down early enough to
be intercepted by the GIT_PERF_MAKE_COMMAND, so you could do e.g.:

    GIT_PERF_MAKE_COMMAND='make CFLAGS="$F"' \
        ./run v2.13.0:"F=-O0" v2.13.0:"F=-O1" v2.13.0:"F=-O2"
v2.13.0:"F=-O3" -- p*.sh

To test the same revision with different compilation flags.

A change like this would break the ability to enact certain perf
optimizations, right now we unpack the revision(s) you specify into
<sha1-it-points-to>, and e.g. make use of the fact that that directory
is already unpacked so we don't need to unpack it again.

If there was no way to pass a flag to specific revisions being tested,
then perf could just optimize:

    ./run v2.12.0 v2.13.0 b06d364310 -- p*.sh

To skip the b06d364310 run entirely since it's the same as v2.13.0. I
think breaking this minor assumption in the framework is fine, but
it's worth noting that it's an assumption we couldn't make anymore.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-04  7:46           ` Ævar Arnfjörð Bjarmason
@ 2017-06-04  8:21             ` Jeff King
  0 siblings, 0 replies; 29+ messages in thread
From: Jeff King @ 2017-06-04  8:21 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Junio C Hamano, Git Mailing List, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Christian Couder, Thomas Rast

On Sun, Jun 04, 2017 at 09:46:19AM +0200, Ævar Arnfjörð Bjarmason wrote:

> What I'm referring to is not a limitation of git (we'll always be able
> to turn off core.fsmonitor), but a limitation of the perf framework.
> There's no way to run a test like this:
> 
>     ./run master next -- p*.sh
> 
> And have some information passed to the test to apply different
> runtime options to the test depending on master or next, or be to test
> master twice, once with the fsmonitor, once without, which this
> hypothetical feature would do:
> 
>     ./run master:"GIT_PERF_7519_NO_FSMONITOR=Y" master -- p*.sh

Yeah, the perf framework was originally designed to find regressions
between versions of Git. It's really bad at comparing results between
any other dimensions. You can test different repository sizes by
tweaking the environment, but it can't aggregate or compare results
between those runs. Likewise, you can have two tests in a script which
time Git with and without certain options set, but there's no way to
compare the results of those tests.

It would be nice if the perf framework was aware of all of
these dimensions, stored each result as a tuple of all of the properties
(rather than just the version), and then let you group the results along
any dimension (I suspect there are cases where multi-dimensional
summaries would be useful, but that could come later).

For some dimensions you'd probably want support in the perf scripts
themselves to run a test with two variants. E.g., something like:

  test_perf_group 'fsmonitor' \
	'false' 'git config core.fsmonitor false' \
	 'true' 'git config core.fsmonitor true'
  test_perf 'status' 'git status'
  test_perf_group_end

where that would run the "git status" test for each of the named setups,
and store the timing results under "($VERSION, fsmonitor=false)", and so
on.

That's less flexible than specifying it to "./run" (which would let you
run just one tuple if you chose). But it also relieves the burden from
the user of figuring out which dimensions are interesting to tweak.

-Peff

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 21:44         ` David Turner
  2017-06-03 18:08           ` Ævar Arnfjörð Bjarmason
@ 2017-06-05 14:27           ` Ben Peart
  1 sibling, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-05 14:27 UTC (permalink / raw)
  To: David Turner, 'Ævar Arnfjörð Bjarmason',
	git@vger.kernel.org
  Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy,
	Johannes Schindelin, Jeff King, Christian Couder



On 6/2/2017 5:44 PM, David Turner wrote:
> BTW, a medium-sized (~250k files across 40k dirs) synthetic repo is available over bittorrent at:
> http://bitmover.com/2015-04-03-1M-git-bare.tar.bz2.torrent
> 
> I tried Ævar's perf test with that (on a beefy laptop with SSD), and got significantly slower results with bp/fsmonitor:
> Test                          origin/master     bp/fsmonitor
> -----------------------------------------------------------------------
> 7519.2: status (first)        0.32(0.23+0.39)   0.32(0.26+0.36) +0.0%
> 7519.3: status (subsequent)   0.18(0.14+0.34)   0.32(0.24+0.37) +77.8%
> 7519.4: status -uno           0.11(0.08+0.32)   0.24(0.18+0.34) +118.2%
> 7519.5: status -uall          0.49(0.22+0.56)   0.62(0.36+0.55) +26.5%
> 7519.2: status (first)        0.32(0.23+0.39)   0.32(0.26+0.36) +0.0%
> 7519.3: status (subsequent)   0.18(0.14+0.34)   0.32(0.24+0.37) +77.8%
> 7519.4: status -uno           0.11(0.08+0.32)   0.24(0.18+0.34) +118.2%
> 7519.5: status -uall          0.49(0.22+0.56)   0.62(0.36+0.55) +26.5%
> 
> I have not yet looked into why this is.
> 

I was very focused on getting minute long status calls down to seconds 
and multiple seconds down to sub-second.  The greatest benefits are when 
the file system cache doesn't already have all the file information 
cached and the current perf test doesn't test that case - just the warm 
cache test which has the least benefit.

That said, status times shouldn't be getting worse and this has 
highlighted that they are.  I've found one reason (the current patch 
series always flags the index as dirty so it gets written out every 
time). I've got a fix that only flags it dirty when the extension is 
turned on or off or when it actually finds an entry that has become 
dirty.  This helps but there is more going on than that.

I'm looking into why the minimum status time with fsmonitor turned on 
and a warm cache seems to be ~30ms.  More to come...

>> -----Original Message-----
>> From: Ævar Arnfjörð Bjarmason [mailto:avarab@gmail.com]
>> Sent: Friday, June 2, 2017 6:29 AM
>> To: git@vger.kernel.org
>> Cc: Junio C Hamano <gitster@pobox.com>; Ben Peart
>> <peartben@gmail.com>; Nguyễn Thái Ngọc Duy <pclouds@gmail.com>;
>> Johannes Schindelin <johannes.schindelin@gmx.de>; David Turner
>> <David.Turner@twosigma.com>; Jeff King <peff@peff.net>; Christian
>> Couder <christian.couder@gmail.com>; Ævar Arnfjörð Bjarmason
>> <avarab@gmail.com>
>> Subject: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
>>
>> Add a performance test for the new core.fsmonitor facility using the sample
>> query-fsmonitor hook.
>>
>> This is WIP code for the reasons explained in the setup comments,
>> unfortunately the perf code doesn't easily allow you to run different setup
>> code for different versions you're testing. This test will stop working if the
>> fsmonitor is merged into the master branch.
>>
>> Output against linxu.git:
>>
>>      $ GIT_PERF_REPEAT_COUNT=10 GIT_PERF_LARGE_REPO=~/g/linux
>> GIT_PERF_MAKE_OPTS='-j8' ./run origin/master avar/fsmonitor ./p7519-
>> fsmonitor.sh
>>      [...]
>>      Test                          origin/master     avar/fsmonitor
>>      -----------------------------------------------------------------------
>>      7519.2: status (first)        0.08(0.04+0.09)   0.12(0.07+0.10) +50.0%
>>      7519.3: status (subsequent)   0.08(0.04+0.09)   0.12(0.06+0.11) +50.0%
>>      7519.4: status -uno           0.02(0.02+0.05)   0.06(0.05+0.06) +200.0%
>>      7519.5: status -uall          0.08(0.06+0.07)   0.12(0.07+0.10) +50.0%
>>
>> And against a larger in-house monorepo I have here, with the same options
>> (except the repo path):
>>
>>      Test                          origin/master     avar/fsmonitor
>>      -----------------------------------------------------------------------
>>      7519.2: status (first)        0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
>>      7519.3: status (subsequent)   0.20(0.11+0.18)   0.27(0.15+0.21) +35.0%
>>      7519.4: status -uno           0.04(0.03+0.10)   0.22(0.08+0.12) +450.0%
>>      7519.5: status -uall          0.20(0.13+0.16)   0.27(0.18+0.19) +35.0%
>>
>> Against linux.git with a hack to flush the FS cache (on Linux) before running
>> the first 'git status', only running one test so the result isn't discarded as the
>> slowest of N:
>>
>>      $ GIT_PERF_REPEAT_COUNT=1 GIT_PERF_LARGE_REPO=~/g/linux
>> GIT_PERF_MAKE_COMMAND='sudo sync && echo 3 | sudo tee
>> /proc/sys/vm/drop_caches >/dev/null && make -j8' ./run origin/master
>> avar/fsmonitor ./p7519-fsmonitor.sh
>>      [...]
>>      Test                          origin/master     avar/fsmonitor
>>      ------------------------------------------------------------------------
>>      7519.2: status (first)        0.30(0.18+0.10)   8.26(0.22+0.10) +2653.3%
>>      7519.3: status (subsequent)   0.08(0.04+0.08)   0.81(0.09+0.07) +912.5%
>>      7519.4: status -uno           0.02(0.01+0.06)   0.08(0.04+0.07) +300.0%
>>      7519.5: status -uall          0.08(0.06+0.07)   0.15(0.08+0.09) +87.5%
>>
>> Now obviously due to 1 run that has a lot of noise, but I would expect that
>> first invocation to be blindingly fast since watchman has info on what files
>> were modified since the cache was flushed.
>>
>> The same on the large monorepo noted above:
>>
>>      Test                          origin/master     avar/fsmonitor
>>      -----------------------------------------------------------------------
>>      7519.2: status (first)        0.59(0.28+0.24)   0.93(0.35+0.19) +57.6%
>>      7519.3: status (subsequent)   0.20(0.10+0.19)   0.28(0.16+0.20) +40.0%
>>      7519.4: status -uno           0.04(0.04+0.09)   0.11(0.08+0.12) +175.0%
>>      7519.5: status -uall          0.29(0.11+0.18)   0.40(0.16+0.19) +37.9%
>>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
>> ---
>>
>>
>> On Fri, Jun 2, 2017 at 2:40 AM, Ben Peart <peartben@gmail.com> wrote:
>>> Any chance you can provide me with a bash script that contains the
>>> exact sequence of commands you are running to get this result?  I've
>>> been trying to replicate it using your notes but have not been able
>>> to.  I'd like to see if it is a repo difference, a platform
>>> difference, a command sequence difference (or something else entirely
>> :)).
>>
>> I can do better than that, here's a new perf test on top of this series which
>> demonstates the issue. I've only tested this on Linux
>> 4.9.0 with watchman 4.9.0 compiled from git (yes, they're coincidentally the
>> same version).
>>
>> A good addition to this would be `printf <fmt for date N sec in the
>> past> | watchman -j` as noted in my earlier mail, but I ran out of
>> time.
>>
>> You can also set any combination of GIT_PERF_7519_UNTRACKED_CACHE &
>> GIT_PERF_7519_SPLIT_INDEX to play with turning that on. I haven't tested all
>> combinations of that, but e.g. testing with untrackedCache didn't give results
>> that looked different from the performance regressions noted above.
>>
>> Aside from performance, I think a very good addition to stress-test this series
>> would be a patch to t/test-lib*sh guarded by some env flag to do a similar
>> watchman watch-del/watch/watch-list dance as the one I'm doing here in
>> the setup, and setting up the hook / config.
>>
>> That would allow testing the entire git test suite with this feature, to find any
>> subtle bugs this might have introduced in git-status.
>>
>>   t/perf/p7519-fsmonitor.sh | 58
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 58 insertions(+)
>>   create mode 100755 t/perf/p7519-fsmonitor.sh
>>
>> diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh new file
>> mode 100755 index 0000000000..b838a0ff14
>> --- /dev/null
>> +++ b/t/perf/p7519-fsmonitor.sh
>> @@ -0,0 +1,58 @@
>> +#!/bin/sh
>> +
>> +test_description="Test core.fsmonitor"
>> +
>> +. ./perf-lib.sh
>> +
>> +test_perf_large_repo
>> +test_checkout_worktree
>> +
>> +test_expect_success 'setup' '
>> +	# Maybe set untrackedCache & splitIndex depending on the
>> +	# environment, defaulting to false.
>> +	if test -n "$GIT_PERF_7519_UNTRACKED_CACHE"
>> +	then
>> +		git config core.untrackedCache true
>> +	else
>> +		git config core.untrackedCache false
>> +	fi &&
>> +	if test -n "$GIT_PERF_7519_SPLIT_INDEX"
>> +	then
>> +		git config core.splitIndex true
>> +	else
>> +		git config core.splitIndex false
>> +	fi &&
>> +
>> +	# Relies on core.fsmonitor not being merged into master. Needs
>> +	# better per-test ways to disable it if it gets merged.
>> +	git config core.fsmonitor true &&
>> +
>> +	# Hook scaffolding
>> +	mkdir .git/hooks &&
>> +	cp ../../../templates/hooks--query-fsmonitor.sample
>> +.git/hooks/query-fsmonitor &&
>> +
>> +	# Setup watchman & ensure it is actually watching
>> +	watchman watch-del "$PWD" >/dev/null 2>&1 &&
>> +	watchman watch "$PWD" >/dev/null 2>&1 &&
>> +	watchman watch-list | grep -q -F "$PWD"
>> +'
>> +
>> +# Setting:
>> +#
>> +#    GIT_PERF_REPEAT_COUNT=1 GIT_PERF_MAKE_COMMAND='sudo sync
>> && echo 3 | sudo tee /proc/sys/vm/drop_caches && make -j8'
>> +#
>> +# Can be used as a hack to performance test 'git status' on a cold fs #
>> +cache with an existing watchman watching the directory, which should #
>> +be blindingly fast, compared to amazingly slow without watchman.
>> +test_perf 'status (first)'       'git status'
>> +
>> +
>> +# The same git-status once the fs cache has been warmed, if using the #
>> +GIT_PERF_MAKE_COMMAND above. Otherwise the same as above.
>> +test_perf 'status (subsequent)'  'git status'
>> +
>> +# Let's see if -uno & -uall make any difference
>> +test_perf 'status -uno'          'git status -uno'
>> +test_perf 'status -uall'         'git status -uall'
>> +
>> +test_done
>> --
>> 2.13.0.506.g27d5fe0cd
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-02 23:06           ` Ævar Arnfjörð Bjarmason
@ 2017-06-07 19:51             ` Ben Peart
  2017-06-07 21:46               ` Ævar Arnfjörð Bjarmason
  0 siblings, 1 reply; 29+ messages in thread
From: Ben Peart @ 2017-06-07 19:51 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Git Mailing List, Junio C Hamano,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder, Ben Peart

On 6/2/2017 7:06 PM, Ævar Arnfjörð Bjarmason wrote:
> 
> I don't have time to update the perf test now or dig into it, but most
> of what you're describing in this mail doesn't at all match with the
> ad-hoc tests I ran in
> https://public-inbox.org/git/CACBZZX5e58bWuf3NdDYTxu2KyZj29hHONzN=rp-7vXd8nURyWQ@mail.gmail.com/
> 
> There (at the very end of the E-Mail) I'm running watchman in a tight
> loop while I flush the entire fs cache, its runtime is never longer
> than 600ms, with 3ms being the norm.

I added a perf trace around the entire query-fsmonitor hook proc (patch 
below) to measure the total actual impact of running the hook script + 
querying watchman + parsing the output with perl + passing the result 
back to git. On my machine, the total cost of the hook runs between 130 
ms and 180 ms when there are zero changes to report (ie best case).

With short status times, the overhead of watchman simply outweighs any 
gains in performance - especially when you have a warm file system cache 
as that cancels out the biggest win of avoiding the IO associated with 
scanning the working directory.

diff --git a/fsmonitor.c b/fsmonitor.c
index 763a8a3a3f..cb47f31863 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -210,9 +210,11 @@ void refresh_by_fsmonitor(struct index_state *istate)
          * If we have a last update time, call query-monitor for the set of
          * changes since that time.
          */
-       if (istate->fsmonitor_last_update)
+       if (istate->fsmonitor_last_update) {
                 query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION,
                         istate->fsmonitor_last_update, &query_result);
+               trace_performance_since(last_update, "query-fsmonitor");
+       }

         if (query_success) {
                 /* Mark all entries returned by the monitor as dirty */

> 
> I.e. flushing the cache doesn't slow things down much at all compared
> to how long a "git status" takes from cold cache. Something else must
> be going on, and the smoking gun is the gprof output I posted in the
> follow-up E-Mail:
> https://public-inbox.org/git/CACBZZX4eZ3G8LQ8O+_BkbkJ-ZXTOkUi9cW=QKYjfHKtmA3pgrA@mail.gmail.com/
> 
> There with the fsmonitor we end up calling blk_SHA1_Block ~100K times
> during "status", but IIRC (I don't have the output in front of me,
> this is from memory) something like twenty times without the
> fsmonitor.
> 
> It can't be a coincidence that with the fscache:
> 
> $ pwd; git ls-files | wc -l
> /home/avar/g/linux
> 59844
> 
> And you can see that in the fsmonitor "git status" we make exactly
> that many calls to cache_entry_from_ondisk(), but those calls don't
> show up at all in the non-fscache codepath.
> 

I don't see how the gprof numbers for the non-fsmonitor case can be 
correct.  It appears they don't contain any calls related to loading the 
index while the fsmonitor gprof numbers do.  Here is a typical call stack:

git.exe!cache_entry_from_ondisk()
git.exe!create_from_disk()
git.exe!do_read_index()
git.exe!read_index_from()
git.exe!read_index()

During read_index(), cache_entry_from_ondisk() gets called for every 
item in the index (which explains the 59K calls).  How can the 
non-fsmonitor codepath not be loading the index?

> So, again, I haven't dug and really must step away from the computer
> now, but this really looks like the fscache saves us the recursive
> readdir() / lstat() etc, but in return we somehow fall though to a
> codepath where we re-read the entire on-disk state back into the
> index, which we don't do in the non-fscache codepath.
> 

I've run multiple profiles and compared them with fsmonitor on and off 
and have been unable to find any performance regression caused by 
fsmonitor (other than flagging the index as dirty at times when it isn't 
required which I have fixed for the next patch series).

I have done many performance runs and when I subtract the _actual_ time 
spent in the hook from the overall command time, it comes in at slightly 
less time than when status is run with fsmonitor off.  This also leads 
me to believe there is no regression with fsmonitor on.

All this leads me back to my original conclusion: the reason status is 
slower in these specific cases is because the overhead of calling the 
hook exceeds the savings gained. If your status calls are taking less 
than a second, it just doesn't make sense to add the complexity and 
overhead of calling a file system watcher.

I'm working on an updated perf test that will demonstrate the best case 
scenario (warm watchman, cold file system cache) in addition to the 
worst case (cold watchman, warm file system cache).  The reality is that 
in normal use cases, perf will be between the two. I'll add that to the 
next iteration of the patch series.

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v4 6/6] fsmonitor: add a sample query-fsmonitor hook script for Watchman
  2017-06-01 15:51 ` [PATCH v4 6/6] fsmonitor: add a sample query-fsmonitor hook script for Watchman Ben Peart
@ 2017-06-07 21:38   ` Ævar Arnfjörð Bjarmason
  0 siblings, 0 replies; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-07 21:38 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano, Ben Peart,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder

On Thu, Jun 1, 2017 at 5:51 PM, Ben Peart <peartben@gmail.com> wrote:
> +if [ $1 -eq 1 ]

Tiny nit: Needs quoting:

    $ .git/hooks/query-fsmonitor
    .git/hooks/query-fsmonitor: 15: [: -eq: unexpected operator

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-07 19:51             ` Ben Peart
@ 2017-06-07 21:46               ` Ævar Arnfjörð Bjarmason
  2017-06-08  1:57                 ` Ben Peart
  0 siblings, 1 reply; 29+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2017-06-07 21:46 UTC (permalink / raw)
  To: Ben Peart
  Cc: Git Mailing List, Junio C Hamano,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder, Ben Peart

On Wed, Jun 7, 2017 at 9:51 PM, Ben Peart <peartben@gmail.com> wrote:
>
>
> On 6/2/2017 7:06 PM, Ævar Arnfjörð Bjarmason wrote:
>>
>>
>> I don't have time to update the perf test now or dig into it, but most
>> of what you're describing in this mail doesn't at all match with the
>> ad-hoc tests I ran in
>>
>> https://public-inbox.org/git/CACBZZX5e58bWuf3NdDYTxu2KyZj29hHONzN=rp-7vXd8nURyWQ@mail.gmail.com/
>>
>> There (at the very end of the E-Mail) I'm running watchman in a tight
>> loop while I flush the entire fs cache, its runtime is never longer
>> than 600ms, with 3ms being the norm.
>
>
> I added a perf trace around the entire query-fsmonitor hook proc (patch
> below) to measure the total actual impact of running the hook script +
> querying watchman + parsing the output with perl + passing the result back
> to git. On my machine, the total cost of the hook runs between 130 ms and
> 180 ms when there are zero changes to report (ie best case).
>
> With short status times, the overhead of watchman simply outweighs any gains
> in performance - especially when you have a warm file system cache as that
> cancels out the biggest win of avoiding the IO associated with scanning the
> working directory.
>
>
> diff --git a/fsmonitor.c b/fsmonitor.c
> index 763a8a3a3f..cb47f31863 100644
> --- a/fsmonitor.c
> +++ b/fsmonitor.c
> @@ -210,9 +210,11 @@ void refresh_by_fsmonitor(struct index_state *istate)
>          * If we have a last update time, call query-monitor for the set of
>          * changes since that time.
>          */
> -       if (istate->fsmonitor_last_update)
> +       if (istate->fsmonitor_last_update) {
>                 query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION,
>                         istate->fsmonitor_last_update, &query_result);
> +               trace_performance_since(last_update, "query-fsmonitor");
> +       }
>
>         if (query_success) {
>                 /* Mark all entries returned by the monitor as dirty */
>
>
>
>>
>> I.e. flushing the cache doesn't slow things down much at all compared
>> to how long a "git status" takes from cold cache. Something else must
>> be going on, and the smoking gun is the gprof output I posted in the
>> follow-up E-Mail:
>>
>> https://public-inbox.org/git/CACBZZX4eZ3G8LQ8O+_BkbkJ-ZXTOkUi9cW=QKYjfHKtmA3pgrA@mail.gmail.com/
>>
>> There with the fsmonitor we end up calling blk_SHA1_Block ~100K times
>> during "status", but IIRC (I don't have the output in front of me,
>> this is from memory) something like twenty times without the
>> fsmonitor.
>>
>> It can't be a coincidence that with the fscache:
>>
>> $ pwd; git ls-files | wc -l
>> /home/avar/g/linux
>> 59844
>>
>> And you can see that in the fsmonitor "git status" we make exactly
>> that many calls to cache_entry_from_ondisk(), but those calls don't
>> show up at all in the non-fscache codepath.
>>
>
> I don't see how the gprof numbers for the non-fsmonitor case can be correct.
> It appears they don't contain any calls related to loading the index while
> the fsmonitor gprof numbers do.  Here is a typical call stack:
>
> git.exe!cache_entry_from_ondisk()
> git.exe!create_from_disk()
> git.exe!do_read_index()
> git.exe!read_index_from()
> git.exe!read_index()
>
> During read_index(), cache_entry_from_ondisk() gets called for every item in
> the index (which explains the 59K calls).  How can the non-fsmonitor
> codepath not be loading the index?
>
>> So, again, I haven't dug and really must step away from the computer
>> now, but this really looks like the fscache saves us the recursive
>> readdir() / lstat() etc, but in return we somehow fall though to a
>> codepath where we re-read the entire on-disk state back into the
>> index, which we don't do in the non-fscache codepath.
>>
>
> I've run multiple profiles and compared them with fsmonitor on and off and
> have been unable to find any performance regression caused by fsmonitor
> (other than flagging the index as dirty at times when it isn't required
> which I have fixed for the next patch series).
>
> I have done many performance runs and when I subtract the _actual_ time
> spent in the hook from the overall command time, it comes in at slightly
> less time than when status is run with fsmonitor off.  This also leads me to
> believe there is no regression with fsmonitor on.
>
> All this leads me back to my original conclusion: the reason status is
> slower in these specific cases is because the overhead of calling the hook
> exceeds the savings gained. If your status calls are taking less than a
> second, it just doesn't make sense to add the complexity and overhead of
> calling a file system watcher.
>
> I'm working on an updated perf test that will demonstrate the best case
> scenario (warm watchman, cold file system cache) in addition to the worst
> case (cold watchman, warm file system cache).  The reality is that in normal
> use cases, perf will be between the two. I'll add that to the next iteration
> of the patch series.

I'll try to dig further once we have the next submission + that perf test.

On Linux the time spent calling the hook itself is minimal:

    $ touch foo; time .git/hooks/query-fsmonitor 1 $(($(date +%s) *
1000000000 - 10))
    Watchman says these changed: foo
    foo
    real    0m0.009s
    user    0m0.004s
    sys     0m0.000s

So I'm fairly sure something else entirely is going on, but anyway, we
can look at that later with the next submission.

Aside from that, I wonder on Windows how much speedier this could be
for you if the entire hook was written in perl instead of a
shellscript that calls perl. I could help with that if you'd like.

You can likely replace that call to uname with reading the $^O variable.

What you're doing shelling out to cygpath can probably be done by
loading the File::Spec library, but I'm not sure.

The echo / watchman / perl would need to be an IPC::Open3 invocation I think.

I think it being in shellscript is fine, just a suggestion in case
having it all in one process (aside from the watchman shell-out) would
help or Windows.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor
  2017-06-07 21:46               ` Ævar Arnfjörð Bjarmason
@ 2017-06-08  1:57                 ` Ben Peart
  0 siblings, 0 replies; 29+ messages in thread
From: Ben Peart @ 2017-06-08  1:57 UTC (permalink / raw)
  To: Ævar Arnfjörð Bjarmason
  Cc: Git Mailing List, Junio C Hamano,
	Nguyễn Thái Ngọc Duy, Johannes Schindelin,
	David Turner, Jeff King, Christian Couder, Ben Peart



On 6/7/2017 5:46 PM, Ævar Arnfjörð Bjarmason wrote:
> On Wed, Jun 7, 2017 at 9:51 PM, Ben Peart <peartben@gmail.com> wrote:
>>
>>
>> On 6/2/2017 7:06 PM, Ævar Arnfjörð Bjarmason wrote:
>>>
>>>
>>> I don't have time to update the perf test now or dig into it, but most
>>> of what you're describing in this mail doesn't at all match with the
>>> ad-hoc tests I ran in
>>>
>>> https://public-inbox.org/git/CACBZZX5e58bWuf3NdDYTxu2KyZj29hHONzN=rp-7vXd8nURyWQ@mail.gmail.com/
>>>
>>> There (at the very end of the E-Mail) I'm running watchman in a tight
>>> loop while I flush the entire fs cache, its runtime is never longer
>>> than 600ms, with 3ms being the norm.
>>
>>
>> I added a perf trace around the entire query-fsmonitor hook proc (patch
>> below) to measure the total actual impact of running the hook script +
>> querying watchman + parsing the output with perl + passing the result back
>> to git. On my machine, the total cost of the hook runs between 130 ms and
>> 180 ms when there are zero changes to report (ie best case).
>>
>> With short status times, the overhead of watchman simply outweighs any gains
>> in performance - especially when you have a warm file system cache as that
>> cancels out the biggest win of avoiding the IO associated with scanning the
>> working directory.
>>
>>
>> diff --git a/fsmonitor.c b/fsmonitor.c
>> index 763a8a3a3f..cb47f31863 100644
>> --- a/fsmonitor.c
>> +++ b/fsmonitor.c
>> @@ -210,9 +210,11 @@ void refresh_by_fsmonitor(struct index_state *istate)
>>           * If we have a last update time, call query-monitor for the set of
>>           * changes since that time.
>>           */
>> -       if (istate->fsmonitor_last_update)
>> +       if (istate->fsmonitor_last_update) {
>>                  query_success = !query_fsmonitor(HOOK_INTERFACE_VERSION,
>>                          istate->fsmonitor_last_update, &query_result);
>> +               trace_performance_since(last_update, "query-fsmonitor");
>> +       }
>>
>>          if (query_success) {
>>                  /* Mark all entries returned by the monitor as dirty */
>>
>>
>>
>>>
>>> I.e. flushing the cache doesn't slow things down much at all compared
>>> to how long a "git status" takes from cold cache. Something else must
>>> be going on, and the smoking gun is the gprof output I posted in the
>>> follow-up E-Mail:
>>>
>>> https://public-inbox.org/git/CACBZZX4eZ3G8LQ8O+_BkbkJ-ZXTOkUi9cW=QKYjfHKtmA3pgrA@mail.gmail.com/
>>>
>>> There with the fsmonitor we end up calling blk_SHA1_Block ~100K times
>>> during "status", but IIRC (I don't have the output in front of me,
>>> this is from memory) something like twenty times without the
>>> fsmonitor.
>>>
>>> It can't be a coincidence that with the fscache:
>>>
>>> $ pwd; git ls-files | wc -l
>>> /home/avar/g/linux
>>> 59844
>>>
>>> And you can see that in the fsmonitor "git status" we make exactly
>>> that many calls to cache_entry_from_ondisk(), but those calls don't
>>> show up at all in the non-fscache codepath.
>>>
>>
>> I don't see how the gprof numbers for the non-fsmonitor case can be correct.
>> It appears they don't contain any calls related to loading the index while
>> the fsmonitor gprof numbers do.  Here is a typical call stack:
>>
>> git.exe!cache_entry_from_ondisk()
>> git.exe!create_from_disk()
>> git.exe!do_read_index()
>> git.exe!read_index_from()
>> git.exe!read_index()
>>
>> During read_index(), cache_entry_from_ondisk() gets called for every item in
>> the index (which explains the 59K calls).  How can the non-fsmonitor
>> codepath not be loading the index?
>>
>>> So, again, I haven't dug and really must step away from the computer
>>> now, but this really looks like the fscache saves us the recursive
>>> readdir() / lstat() etc, but in return we somehow fall though to a
>>> codepath where we re-read the entire on-disk state back into the
>>> index, which we don't do in the non-fscache codepath.
>>>
>>
>> I've run multiple profiles and compared them with fsmonitor on and off and
>> have been unable to find any performance regression caused by fsmonitor
>> (other than flagging the index as dirty at times when it isn't required
>> which I have fixed for the next patch series).
>>
>> I have done many performance runs and when I subtract the _actual_ time
>> spent in the hook from the overall command time, it comes in at slightly
>> less time than when status is run with fsmonitor off.  This also leads me to
>> believe there is no regression with fsmonitor on.
>>
>> All this leads me back to my original conclusion: the reason status is
>> slower in these specific cases is because the overhead of calling the hook
>> exceeds the savings gained. If your status calls are taking less than a
>> second, it just doesn't make sense to add the complexity and overhead of
>> calling a file system watcher.
>>
>> I'm working on an updated perf test that will demonstrate the best case
>> scenario (warm watchman, cold file system cache) in addition to the worst
>> case (cold watchman, warm file system cache).  The reality is that in normal
>> use cases, perf will be between the two. I'll add that to the next iteration
>> of the patch series.
> 
> I'll try to dig further once we have the next submission + that perf test.
> 
> On Linux the time spent calling the hook itself is minimal:
> 
>      $ touch foo; time .git/hooks/query-fsmonitor 1 $(($(date +%s) *
> 1000000000 - 10))
>      Watchman says these changed: foo
>      foo
>      real    0m0.009s
>      user    0m0.004s
>      sys     0m0.000s
> 

Wow, what a difference:

$ touch foo; time .git/hooks/query-fsmonitor 1 $(($(date +%s) *
 > 1000000000 - 10))
.git/hooks/query-fsmonitor 1 1496885562999999990
.gitfoo
real    0m0.170s
user    0m0.045s
sys     0m0.091s


> So I'm fairly sure something else entirely is going on, but anyway, we
> can look at that later with the next submission.
> 
> Aside from that, I wonder on Windows how much speedier this could be
> for you if the entire hook was written in perl instead of a
> shellscript that calls perl. I could help with that if you'd like.
> 

Anything we can do to speed this up and have fewer moving pieces is all 
good.  I'm not a perl expert so I'd very much appreciate your expertise. 
  I have a couple of notes from Wez (author of Watchman) on things we 
could do to better optimize certain scenarios I'll pass along as well.

> You can likely replace that call to uname with reading the $^O variable.
> 
> What you're doing shelling out to cygpath can probably be done by
> loading the File::Spec library, but I'm not sure.
> 
> The echo / watchman / perl would need to be an IPC::Open3 invocation I think.
> 
> I think it being in shellscript is fine, just a suggestion in case
> having it all in one process (aside from the watchman shell-out) would
> help or Windows.
> 

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2017-06-08  1:57 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-01 15:50 [PATCH v4 0/6] Fast git status via a file system watcher Ben Peart
2017-06-01 15:51 ` [PATCH v4 1/6] bswap: add 64 bit endianness helper get_be64 Ben Peart
2017-06-01 15:51 ` [PATCH v4 2/6] dir: make lookup_untracked() available outside of dir.c Ben Peart
2017-06-01 15:51 ` [PATCH v4 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files Ben Peart
2017-06-01 15:51 ` [PATCH v4 4/6] fsmonitor: add test cases for fsmonitor extension Ben Peart
2017-06-01 15:51 ` [PATCH v4 5/6] fsmonitor: add documentation for the " Ben Peart
2017-06-01 15:51 ` [PATCH v4 6/6] fsmonitor: add a sample query-fsmonitor hook script for Watchman Ben Peart
2017-06-07 21:38   ` Ævar Arnfjörð Bjarmason
2017-06-01 19:57 ` [PATCH v4 0/6] Fast git status via a file system watcher Ævar Arnfjörð Bjarmason
2017-06-01 21:06   ` Ben Peart
2017-06-01 21:12     ` Ævar Arnfjörð Bjarmason
2017-06-01 21:13     ` Stefan Beller
2017-06-01 21:26       ` Jeff King
2017-06-01 20:51 ` Ævar Arnfjörð Bjarmason
2017-06-01 21:13   ` Ævar Arnfjörð Bjarmason
2017-06-02  0:40     ` Ben Peart
2017-06-02 10:28       ` [WIP/PATCH 7/6] perf: add a performance test for core.fsmonitor Ævar Arnfjörð Bjarmason
2017-06-02 21:44         ` David Turner
2017-06-03 18:08           ` Ævar Arnfjörð Bjarmason
2017-06-05 14:27           ` Ben Peart
2017-06-02 22:05         ` Ben Peart
2017-06-02 23:06           ` Ævar Arnfjörð Bjarmason
2017-06-07 19:51             ` Ben Peart
2017-06-07 21:46               ` Ævar Arnfjörð Bjarmason
2017-06-08  1:57                 ` Ben Peart
2017-06-04  1:59         ` Junio C Hamano
2017-06-04  7:46           ` Ævar Arnfjörð Bjarmason
2017-06-04  8:21             ` Jeff King
2017-06-02  1:56 ` [PATCH v4 0/6] Fast git status via a file system watcher Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).