git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Matheus Tavares <matheus.bernardino@usp.br>
To: git@vger.kernel.org
Cc: "Christian Couder" <christian.couder@gmail.com>,
	"Olga Telezhnaya" <olyatelezhnaya@gmail.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Stefan Beller" <stefanbeller@gmail.com>,
	"Denton Liu" <liu.denton@gmail.com>, "Jeff King" <peff@peff.net>,
	"Junio C Hamano" <gitster@pobox.com>
Subject: [GSoC][PATCH 1/4] object-store: add lock to read_object_file_extended()
Date: Sat, 10 Aug 2019 17:27:27 -0300	[thread overview]
Message-ID: <052de4c139bf4962182e6cb8f4aa315aa6130124.1565468806.git.matheus.bernardino@usp.br> (raw)
In-Reply-To: <cover.1565468806.git.matheus.bernardino@usp.br>

Allow read_object_file_extended() to be called by multiple threads
protecting it with a lock. The lock usage can be toggled with
enable_obj_read_lock() and disable_obj_read_lock().

Probably there are many spots in read_object_file_extended()'s call
chain that could be executed unlocked (and thus, in parallel). But, for
now, we are only interested in allowing parallel access to zlib
inflation. This is one of the sections where object reading spends most
of the time and it's already thread-safe. So, to take advantage of that,
the lock is released when entering it and re-acquired right after. We
may refine the lock to also exploit other possible parallel spots in the
future, but threaded zlib inflation should already give great speedups.

Note that add_delta_base_cache() was also modified to skip adding
already present entries to the cache. This wasn't possible before, but
now it is since phase I and phase III of unpack_entry() may execute
concurrently.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 object-store.h |  4 ++++
 packfile.c     |  7 ++++++
 sha1-file.c    | 61 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/object-store.h b/object-store.h
index 7f7b3cdd80..cfc9484995 100644
--- a/object-store.h
+++ b/object-store.h
@@ -159,6 +159,10 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf,
 void *map_loose_object(struct repository *r, const struct object_id *oid,
 		       unsigned long *size);
 
+void enable_obj_read_lock(void);
+void disable_obj_read_lock(void);
+void obj_read_lock(void);
+void obj_read_unlock(void);
 void *read_object_file_extended(struct repository *r,
 				const struct object_id *oid,
 				enum object_type *type,
diff --git a/packfile.c b/packfile.c
index fc43a6c52c..de93dc50e2 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1115,7 +1115,9 @@ unsigned long get_size_from_delta(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
+		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
+		obj_read_lock();
 		curpos += stream.next_in - in;
 	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
 		 stream.total_out < sizeof(delta_head));
@@ -1468,6 +1470,9 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
 	struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
 	struct list_head *lru, *tmp;
 
+	if (get_delta_base_cache_entry(p, base_offset))
+		return;
+
 	delta_base_cached += base_size;
 
 	list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
@@ -1597,7 +1602,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
 	do {
 		in = use_pack(p, w_curs, curpos, &stream.avail_in);
 		stream.next_in = in;
+		obj_read_unlock();
 		st = git_inflate(&stream, Z_FINISH);
+		obj_read_lock();
 		if (!stream.avail_out)
 			break; /* the payload is larger than it should be */
 		curpos += stream.next_in - in;
diff --git a/sha1-file.c b/sha1-file.c
index 84fd02f107..f5ff51aedb 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -1560,16 +1560,54 @@ int pretend_object_file(void *buf, unsigned long len, enum object_type type,
 	return 0;
 }
 
+static pthread_mutex_t obj_read_mutex;
+static int obj_read_use_lock = 0;
+
+/*
+ * Enabling the object read lock allows multiple threads to safely call the
+ * following functions in parallel: repo_read_object_file(), read_object_file()
+ * and read_object_file_extended().
+ */
+void enable_obj_read_lock(void)
+{
+	if (obj_read_use_lock)
+		return;
+
+	obj_read_use_lock = 1;
+	pthread_mutex_init(&obj_read_mutex, NULL);
+}
+
+void disable_obj_read_lock(void)
+{
+	if (!obj_read_use_lock)
+		return;
+
+	obj_read_use_lock = 0;
+	pthread_mutex_destroy(&obj_read_mutex);
+}
+
+void obj_read_lock(void)
+{
+	if(obj_read_use_lock)
+		pthread_mutex_lock(&obj_read_mutex);
+}
+
+void obj_read_unlock(void)
+{
+	if(obj_read_use_lock)
+		pthread_mutex_unlock(&obj_read_mutex);
+}
+
 /*
  * This function dies on corrupt objects; the callers who want to
  * deal with them should arrange to call read_object() and give error
  * messages themselves.
  */
-void *read_object_file_extended(struct repository *r,
-				const struct object_id *oid,
-				enum object_type *type,
-				unsigned long *size,
-				int lookup_replace)
+static void *do_read_object_file_extended(struct repository *r,
+					  const struct object_id *oid,
+					  enum object_type *type,
+					  unsigned long *size,
+					  int lookup_replace)
 {
 	void *data;
 	const struct packed_git *p;
@@ -1602,6 +1640,19 @@ void *read_object_file_extended(struct repository *r,
 	return NULL;
 }
 
+void *read_object_file_extended(struct repository *r,
+				const struct object_id *oid,
+				enum object_type *type,
+				unsigned long *size,
+				int lookup_replace)
+{
+	void *data;
+	obj_read_lock();
+	data = do_read_object_file_extended(r, oid, type, size, lookup_replace);
+	obj_read_unlock();
+	return data;
+}
+
 void *read_object_with_reference(struct repository *r,
 				 const struct object_id *oid,
 				 const char *required_type_name,
-- 
2.22.0


  reply	other threads:[~2019-08-10 20:28 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-10 20:27 [GSoC][PATCH 0/4] grep: re-enable threads when cached, w/ parallel inflation Matheus Tavares
2019-08-10 20:27 ` Matheus Tavares [this message]
2019-08-10 20:27 ` [GSoC][PATCH 2/4] grep: allow locks to be enabled individually Matheus Tavares
2019-08-10 20:27 ` [GSoC][PATCH 3/4] grep: disable grep_read_mutex when possible Matheus Tavares
2019-08-10 20:27 ` [GSoC][PATCH 4/4] grep: re-enable threads in some non-worktree cases Matheus Tavares
2019-09-30  1:50 ` [PATCH v2 00/11] grep: improve threading and fix race conditions Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 01/11] grep: fix race conditions on userdiff calls Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 02/11] grep: fix race conditions at grep_submodule() Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 03/11] grep: fix racy calls in grep_objects() Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 04/11] replace-object: make replace operations thread-safe Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 05/11] object-store: allow threaded access to object reading Matheus Tavares
2019-11-12  2:54     ` Jonathan Tan
2019-11-13  5:20       ` Jeff King
2019-11-14  5:57         ` Matheus Tavares Bernardino
2019-11-14  6:01           ` Jeff King
2019-11-14 18:15             ` Jonathan Tan
2019-11-15  4:12               ` Jeff King
2019-12-19 22:27                 ` Matheus Tavares Bernardino
2020-01-09 22:02                   ` Matheus Tavares Bernardino
2020-01-10 19:07                     ` Christian Couder
2019-09-30  1:50   ` [PATCH v2 06/11] grep: replace grep_read_mutex by internal obj read lock Matheus Tavares
2019-10-01 19:23     ` [PATCH] squash! " Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 07/11] submodule-config: add skip_if_read option to repo_read_gitmodules() Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 08/11] grep: allow submodule functions to run in parallel Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 09/11] grep: protect packed_git [re-]initialization Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 10/11] grep: re-enable threads in non-worktree case Matheus Tavares
2019-09-30  1:50   ` [PATCH v2 11/11] grep: move driver pre-load out of critical section Matheus Tavares
2020-01-16  2:39   ` [PATCH v3 00/12] grep: improve threading and fix race conditions Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 01/12] grep: fix race conditions on userdiff calls Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 02/12] grep: fix race conditions at grep_submodule() Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 03/12] grep: fix racy calls in grep_objects() Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 04/12] replace-object: make replace operations thread-safe Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 05/12] object-store: allow threaded access to object reading Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 06/12] grep: replace grep_read_mutex by internal obj read lock Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 07/12] submodule-config: add skip_if_read option to repo_read_gitmodules() Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 08/12] grep: allow submodule functions to run in parallel Matheus Tavares
2020-01-29 11:26       ` SZEDER Gábor
2020-01-29 18:49         ` Junio C Hamano
2020-01-29 18:57         ` Junio C Hamano
2020-01-29 20:42           ` Matheus Tavares Bernardino
2020-01-30 13:28             ` Philippe Blain
2020-01-16  2:39     ` [PATCH v3 09/12] grep: protect packed_git [re-]initialization Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 10/12] grep: re-enable threads in non-worktree case Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 11/12] grep: move driver pre-load out of critical section Matheus Tavares
2020-01-16  2:40     ` [PATCH v3 12/12] grep: use no. of cores as the default no. of threads Matheus Tavares
2020-01-16 13:11       ` Victor Leschuk
2020-01-16 14:47         ` [PATCH] " Matheus Tavares

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=052de4c139bf4962182e6cb8f4aa315aa6130124.1565468806.git.matheus.bernardino@usp.br \
    --to=matheus.bernardino@usp.br \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=liu.denton@gmail.com \
    --cc=olyatelezhnaya@gmail.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=stefanbeller@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).