git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org, pclouds@gmail.com
Subject: [PATCH 13/19] index-helper: use watchman to avoid refreshing index with lstat()
Date: Wed,  9 Mar 2016 13:36:16 -0500	[thread overview]
Message-ID: <1457548582-28302-14-git-send-email-dturner@twopensource.com> (raw)
In-Reply-To: <1457548582-28302-1-git-send-email-dturner@twopensource.com>

From: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>

Watchman is hidden behind index-helper. Before git tries to read the
index from shm, it notifies index-helper with SIGHUP and sleep,
waiting for index-helper to prepare shm. index-helper then contacts
watchman, updates 'WAMA' extension and put it in a separate shm and
wakes git up with SIGHUP.

Git uses this extension to not lstat unchanged entries. Git only trust
'WAMA' extension when it's received from the separate shm, not from
disk. Unmarked entries are "clean". Marked entries are dirty from
watchman point of view. If it finds out some entries are
'watchman-dirty', but are really unchanged (e.g. the file was changed,
then reverted back), then Git will clear the marking in 'WAMA' before
writing it down.

Hiding watchman behind index-helper means you need both daemons. You
can't run watchman alone. Not so good. But on the other hand, 'git'
binary is not linked to watchman/json libraries, which is good for
packaging. Core git package will run fine without watchman-related
packages. If they need watchman, they can install git-index-helper and
dependencies.

Another reason for tying watchman to index-helper is, when used with
untracked cache, we need to keep track of $GIT_WORK_TREE file
listing. That kind of list can be kept in index-helper.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Makefile       |  5 ++++
 cache.h        |  2 ++
 index-helper.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 read-cache.c   | 43 +++++++++++++++++++++++++++---
 4 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index e51331c..d79fc0c 100644
--- a/Makefile
+++ b/Makefile
@@ -450,6 +450,7 @@ MSGFMT = msgfmt
 CURL_CONFIG = curl-config
 PTHREAD_LIBS = -lpthread
 PTHREAD_CFLAGS =
+WATCHMAN_LIBS =
 GCOV = gcov
 
 export TCL_PATH TCLTK_PATH
@@ -1419,6 +1420,7 @@ endif
 ifdef USE_WATCHMAN
 	LIB_H += watchman-support.h
 	LIB_OBJS += watchman-support.o
+	WATCHMAN_LIBS = -lwatchman
 	BASIC_CFLAGS += -DUSE_WATCHMAN
 endif
 
@@ -2032,6 +2034,9 @@ git-remote-testsvn$X: remote-testsvn.o GIT-LDFLAGS $(GITLIBS) $(VCSSVN_LIB)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) \
 	$(VCSSVN_LIB)
 
+git-index-helper$X: index-helper.o GIT-LDFLAGS $(GITLIBS)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) $(WATCHMAN_LIBS)
+
 $(REMOTE_CURL_ALIASES): $(REMOTE_CURL_PRIMARY)
 	$(QUIET_LNCP)$(RM) $@ && \
 	ln $< $@ 2>/dev/null || \
diff --git a/cache.h b/cache.h
index bf20652..272c928 100644
--- a/cache.h
+++ b/cache.h
@@ -558,6 +558,7 @@ extern int daemonize(int *);
 
 /* Initialize and use the cache information */
 struct lock_file;
+extern int verify_index(const struct index_state *);
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const struct pathspec *pathspec);
 extern int do_read_index(struct index_state *istate, const char *path,
@@ -565,6 +566,7 @@ extern int do_read_index(struct index_state *istate, const char *path,
 extern int read_index_from(struct index_state *, const char *path);
 extern int is_index_unborn(struct index_state *);
 extern int read_index_unmerged(struct index_state *);
+extern void write_watchman_ext(struct strbuf *sb, struct index_state* istate);
 #define COMMIT_LOCK		(1 << 0)
 #define CLOSE_LOCK		(1 << 1)
 #define REFRESH_DAEMON		(1 << 2)
diff --git a/index-helper.c b/index-helper.c
index cf26da7..7e7ce9b 100644
--- a/index-helper.c
+++ b/index-helper.c
@@ -5,15 +5,18 @@
 #include "split-index.h"
 #include "shm.h"
 #include "lockfile.h"
+#include "watchman-support.h"
 
 struct shm {
 	unsigned char sha1[20];
 	void *shm;
 	size_t size;
+	pid_t pid;
 };
 
 static struct shm shm_index;
 static struct shm shm_base_index;
+static struct shm shm_watchman;
 static int daemonized, to_verify = 1;
 
 static void release_index_shm(struct shm *is)
@@ -25,10 +28,21 @@ static void release_index_shm(struct shm *is)
 	is->shm = NULL;
 }
 
+static void release_watchman_shm(struct shm *is)
+{
+	if (!is->shm)
+		return;
+	munmap(is->shm, is->size);
+	git_shm_unlink("git-watchman-%s-%" PRIuMAX,
+		       sha1_to_hex(is->sha1), (uintmax_t)is->pid);
+	is->shm = NULL;
+}
+
 static void cleanup_shm(void)
 {
 	release_index_shm(&shm_index);
 	release_index_shm(&shm_base_index);
+	release_watchman_shm(&shm_watchman);
 }
 
 static void cleanup(void)
@@ -120,13 +134,15 @@ static void share_the_index(void)
 	if (the_index.split_index && the_index.split_index->base)
 		share_index(the_index.split_index->base, &shm_base_index);
 	share_index(&the_index, &shm_index);
-	if (to_verify && !verify_shm())
+	if (to_verify && !verify_shm()) {
 		cleanup_shm();
-	discard_index(&the_index);
+		discard_index(&the_index);
+	}
 }
 
 static void refresh(int sig)
 {
+	discard_index(&the_index);
 	the_index.keep_mmap = 1;
 	the_index.to_shm    = 1;
 	if (read_cache() < 0)
@@ -136,7 +152,55 @@ static void refresh(int sig)
 
 #ifdef HAVE_SHM
 
-static void do_nothing(int sig)
+#ifdef USE_WATCHMAN
+static void share_watchman(struct index_state *istate,
+			   struct shm *is, pid_t pid)
+{
+	struct strbuf sb = STRBUF_INIT;
+	void *shm;
+
+	write_watchman_ext(&sb, istate);
+	if (git_shm_map(O_CREAT | O_EXCL | O_RDWR, 0700, sb.len + 20,
+			&shm, PROT_READ | PROT_WRITE, MAP_SHARED,
+			"git-watchman-%s-%" PRIuMAX,
+			sha1_to_hex(istate->sha1), (uintmax_t)pid) == sb.len + 20) {
+		is->size = sb.len + 20;
+		is->shm = shm;
+		is->pid = pid;
+		hashcpy(is->sha1, istate->sha1);
+
+		memcpy(shm, sb.buf, sb.len);
+		hashcpy((unsigned char *)shm + is->size - 20, is->sha1);
+	}
+	strbuf_release(&sb);
+}
+
+static void prepare_with_watchman(pid_t pid)
+{
+	/*
+	 * with the help of watchman, maybe we could detect if
+	 * $GIT_DIR/index is updated..
+	 */
+	if (!verify_index(&the_index))
+		refresh(0);
+
+	if (check_watchman(&the_index))
+		return;
+
+	share_watchman(&the_index, &shm_watchman, pid);
+}
+
+static void prepare_index(int sig, siginfo_t *si, void *context)
+{
+	release_watchman_shm(&shm_watchman);
+	if (the_index.last_update)
+		prepare_with_watchman(si->si_pid);
+	kill(si->si_pid, SIGHUP); /* stop the waiting in poke_daemon() */
+}
+
+#else
+
+static void prepare_index(int sig, siginfo_t *si, void *context)
 {
 	/*
 	 * what we need is the signal received and interrupts
@@ -145,11 +209,21 @@ static void do_nothing(int sig)
 	 */
 }
 
+#endif
+
 static void loop(const char *pid_file, int idle_in_seconds)
 {
+	struct sigaction sa;
+
 	sigchain_pop(SIGHUP);	/* pushed by sigchain_push_common */
 	sigchain_push(SIGHUP, refresh);
-	sigchain_push(SIGUSR1, do_nothing);
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = prepare_index;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO;
+	sigaction(SIGUSR1, &sa, NULL);
+
 	refresh(0);
 	while (sleep(idle_in_seconds))
 		; /* do nothing, all is handled by signal handlers already */
@@ -245,6 +319,8 @@ int main(int argc, char **argv)
 				       LOCK_DIE_ON_ERROR);
 #ifdef GIT_WINDOWS_NATIVE
 	strbuf_addstr(&sb, "HWND");
+#elif defined(USE_WATCHMAN)
+	strbuf_addch(&sb, 'W');	/* see poke_daemon() */
 #endif
 	strbuf_addf(&sb, "%" PRIuMAX, (uintmax_t) getpid());
 	write_in_full(fd, sb.buf, sb.len);
diff --git a/read-cache.c b/read-cache.c
index 57c5df9..78f5f0e 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1405,7 +1405,7 @@ static int read_watchman_ext(struct index_state *istate, const void *data,
 	return 0;
 }
 
-static void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
+void write_watchman_ext(struct strbuf *sb, struct index_state* istate)
 {
 	struct ewah_bitmap *bitmap;
 	int i;
@@ -1722,6 +1722,39 @@ static int try_shm(struct index_state *istate)
 	return 0;
 }
 
+static void refresh_by_watchman(struct index_state *istate)
+{
+	void *shm = NULL;
+	int length;
+	int i;
+
+	length = git_shm_map(O_RDONLY, 0700, -1, &shm,
+			     PROT_READ, MAP_SHARED,
+			     "git-watchman-%s-%" PRIuMAX,
+			     sha1_to_hex(istate->sha1),
+			     (uintmax_t)getpid());
+
+	if (length <= 20 ||
+	    hashcmp(istate->sha1, (unsigned char *)shm + length - 20) ||
+	    /*
+	     * No need to clear CE_WATCHMAN_DIRTY set by 'WAMA' on
+	     * disk. Watchman can only set more, not clear any, so
+	     * this is OR mask.
+	     */
+	    read_watchman_ext(istate, shm, length - 20))
+		goto done;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		if (ce_stage(ce) || (ce->ce_flags & CE_WATCHMAN_DIRTY))
+			continue;
+		ce_mark_uptodate(ce);
+	}
+done:
+	if (shm)
+		munmap(shm, length);
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -1842,7 +1875,7 @@ int read_index_from(struct index_state *istate, const char *path)
 	split_index = istate->split_index;
 	if (!split_index || is_null_sha1(split_index->base_sha1)) {
 		post_read_index_from(istate);
-		return ret;
+		goto done;
 	}
 
 	if (split_index->base)
@@ -1863,6 +1896,10 @@ int read_index_from(struct index_state *istate, const char *path)
 		    sha1_to_hex(split_index->base->sha1));
 	merge_base_index(istate);
 	post_read_index_from(istate);
+
+done:
+	if (ret > 0 && istate->from_shm && istate->last_update)
+		refresh_by_watchman(istate);
 	return ret;
 }
 
@@ -2164,7 +2201,7 @@ out:
 	return 0;
 }
 
-static int verify_index(const struct index_state *istate)
+int verify_index(const struct index_state *istate)
 {
 	return verify_index_from(istate, get_index_file());
 }
-- 
2.4.2.767.g62658d5-twtrsrc

  parent reply	other threads:[~2016-03-09 18:37 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-09 18:36 [PATCH 00/19] index-helper, watchman David Turner
2016-03-09 18:36 ` [PATCH 01/19] trace.c: add GIT_TRACE_PACK_STATS for pack usage statistics David Turner
2016-03-09 22:58   ` Junio C Hamano
2016-03-10  0:05     ` David Turner
2016-03-10 10:59       ` Duy Nguyen
2016-03-09 18:36 ` [PATCH 02/19] read-cache.c: fix constness of verify_hdr() David Turner
2016-03-09 18:36 ` [PATCH 03/19] read-cache: allow to keep mmap'd memory after reading David Turner
2016-03-09 23:02   ` Junio C Hamano
2016-03-10  0:09     ` David Turner
2016-03-09 18:36 ` [PATCH 04/19] index-helper: new daemon for caching index and related stuff David Turner
2016-03-09 23:09   ` Junio C Hamano
2016-03-09 23:21     ` Junio C Hamano
2016-03-10  0:01       ` David Turner
2016-03-10 11:17       ` Duy Nguyen
2016-03-10 20:22         ` David Turner
2016-03-11  1:11           ` Duy Nguyen
2016-03-10  0:18     ` David Turner
2016-03-15 11:56     ` Duy Nguyen
2016-03-15 15:56       ` Junio C Hamano
2016-03-15 11:52   ` Duy Nguyen
2016-03-09 18:36 ` [PATCH 05/19] trace.c: add GIT_TRACE_INDEX_STATS for index statistics David Turner
2016-03-09 18:36 ` [PATCH 06/19] index-helper: add --strict David Turner
2016-03-09 18:36 ` [PATCH 07/19] daemonize(): set a flag before exiting the main process David Turner
2016-03-09 18:36 ` [PATCH 08/19] index-helper: add --detach David Turner
2016-03-09 18:36 ` [PATCH 09/19] index-helper: add Windows support David Turner
2016-03-16 11:42   ` Duy Nguyen
2016-03-17 12:18     ` Johannes Schindelin
2016-03-17 12:59       ` Duy Nguyen
2016-03-09 18:36 ` [PATCH 10/19] read-cache: add watchman 'WAMA' extension David Turner
2016-03-09 18:36 ` [PATCH 11/19] Add watchman support to reduce index refresh cost David Turner
2016-03-09 18:36 ` [PATCH 12/19] read-cache: allow index-helper to prepare shm before git reads it David Turner
2016-03-09 18:36 ` David Turner [this message]
2016-03-09 18:36 ` [PATCH 14/19] update-index: enable/disable watchman support David Turner
2016-03-09 18:36 ` [PATCH 15/19] unpack-trees: preserve index extensions David Turner
2016-03-09 18:36 ` [PATCH 16/19] index-helper: rewrite pidfile after daemonizing David Turner
2016-03-09 18:36 ` [PATCH 17/19] index-helper: process management David Turner
2016-03-09 18:36 ` [PATCH 18/19] index-helper: autorun David Turner
2016-03-15 12:12   ` Duy Nguyen
2016-03-15 14:26     ` Johannes Schindelin
2016-03-16 11:37       ` Duy Nguyen
2016-03-16 18:11       ` David Turner
2016-03-16 18:27         ` Johannes Schindelin
2016-03-17 13:02           ` Duy Nguyen
2016-03-17 14:43             ` Johannes Schindelin
2016-03-17 18:31               ` David Turner
2016-03-18  0:50               ` Duy Nguyen
2016-03-18  7:14                 ` Johannes Schindelin
2016-03-18  7:44                   ` Duy Nguyen
2016-03-18 17:22                     ` David Turner
2016-03-18 23:09                       ` Duy Nguyen
2016-03-18  7:17                 ` Johannes Schindelin
2016-03-18  7:34                   ` Duy Nguyen
2016-03-18 15:57                     ` Johannes Schindelin
2016-03-09 18:36 ` [PATCH 19/19] hack: watchman/untracked cache mashup David Turner
2016-03-15 12:31   ` Duy Nguyen
2016-03-17  0:56     ` David Turner
2016-03-17 13:06       ` Duy Nguyen
2016-03-17 18:08         ` David Turner
2016-03-29 17:09 ` [PATCH 00/19] index-helper, watchman Torsten Bögershausen
2016-03-29 21:51   ` David Turner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1457548582-28302-14-git-send-email-dturner@twopensource.com \
    --to=dturner@twopensource.com \
    --cc=git@vger.kernel.org \
    --cc=pclouds@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).