From: Matheus Tavares <matheus.bernardino@usp.br>
To: git@vger.kernel.org
Cc: stolee@gmail.com, jeffhost@microsoft.com,
"Junio C Hamano" <gitster@pobox.com>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
"Thomas Gummerer" <t.gummerer@gmail.com>
Subject: [RFC PATCH 20/21] parallel-checkout: create leading dirs in workers
Date: Mon, 10 Aug 2020 18:33:28 -0300 [thread overview]
Message-ID: <d9e8bad5d4e6cf3c44c6e48d28c40536afcba115.1597093021.git.matheus.bernardino@usp.br> (raw)
In-Reply-To: <cover.1597093021.git.matheus.bernardino@usp.br>
Allow the parallel workers to create the leading directories of the
entries being checked out, instead of pre-creating them in the main
process. This optimization should be more effective on file systems with
higher I/O latency.
Part of the process of creating leading dirs is the removal of any
non-directory file that could be in the way. This is currently done
inside entry.c:create_directories(). However, if we were to move this to
the workers as well, we would risk removing a file just written by
another worker, which collided with the one currently being written. In
a worse scenario, we could remove the file right after a worker have
closed it but before it called stat(). To avoid these problems, let's
remove the non-directory files in the main process. And to avoid the
cost of extra lstat() calls in this process, we use
has_dirs_only_path(), which will have the necessary information already
cached from check_path().
Finally, to create the leading dirs in the workers, we could re-use
create_directories(). But, unlike the main process, we wouldn't have the
stat() information cached. Thus, let's use raceproof_create_file(),
which will only stat() the path components after a open() failure,
saving us time when creating subsequent files in the same directory.
Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
entry.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
parallel-checkout.c | 42 ++++++++++++++++++++++++++++++++++++------
2 files changed, 78 insertions(+), 9 deletions(-)
diff --git a/entry.c b/entry.c
index e876adff19..5dfd4d150d 100644
--- a/entry.c
+++ b/entry.c
@@ -57,6 +57,43 @@ static void create_directories(const char *path, int path_len,
free(buf);
}
+static void remove_non_dirs(const char *path, int path_len,
+ const struct checkout *state)
+{
+ char *buf = xmallocz(path_len);
+ int len = 0;
+
+ while (len < path_len) {
+ int ret;
+
+ do {
+ buf[len] = path[len];
+ len++;
+ } while (len < path_len && !is_dir_sep(path[len]));
+ if (len >= path_len)
+ break;
+ buf[len] = 0;
+
+ ret = has_dirs_only_path(buf, len, state->base_dir_len);
+
+ if (ret > 0)
+ continue; /* Is directory. */
+ if (ret < 0)
+ break; /* No entry */
+
+ /* ret == 0: not a directory, let's unlink it. */
+
+ if (!state->force)
+ die("'%s' already exists, and it's not a directory", buf);
+
+ if (unlink(buf))
+ die_errno("cannot unlink '%s'", buf);
+ else
+ break;
+ }
+ free(buf);
+}
+
static void remove_subtree(struct strbuf *path)
{
DIR *dir = opendir(path->buf);
@@ -555,8 +592,6 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
} else if (state->not_new)
return 0;
- create_directories(path.buf, path.len, state);
-
if (nr_checkouts)
(*nr_checkouts)++;
@@ -565,9 +600,13 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
ca = &ca_buf;
}
- if (!enqueue_checkout(ce, ca))
+ if (!enqueue_checkout(ce, ca)) {
+ /* "clean" path so that workers can create leading dirs */
+ remove_non_dirs(path.buf, path.len, state);
return 0;
+ }
+ create_directories(path.buf, path.len, state);
return write_entry(ce, path.buf, ca, state, 0);
}
diff --git a/parallel-checkout.c b/parallel-checkout.c
index 4d72540256..5b73d8fa4b 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -298,20 +298,48 @@ static int close_and_clear(int *fd)
return ret;
}
+struct ci_open_data {
+ int fd;
+ unsigned int mode;
+};
+
+static int ci_open(const char *path, void *cb)
+{
+ struct ci_open_data *data = cb;
+ data->fd = open(path, O_WRONLY | O_CREAT | O_EXCL, data->mode);
+
+ if (data->fd < 0) {
+ /*
+ * EISDIR can only indicate path collisions among the entries
+ * being checked out. We don't need raceproof_create_file() to
+ * try removing empty dirs. Instead, just let the caller known
+ * that the path already exists, so that the collision can be
+ * properly handled later.
+ */
+ if (errno == EISDIR)
+ errno = EEXIST;
+ return 1;
+ }
+
+ return 0;
+}
+
void write_checkout_item(struct checkout *state, struct checkout_item *ci)
{
- unsigned int mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
+ struct ci_open_data open_data;
int fd = -1, fstat_done = 0;
struct strbuf path = STRBUF_INIT;
+ open_data.mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
strbuf_add(&path, state->base_dir, state->base_dir_len);
strbuf_add(&path, ci->ce->name, ci->ce->ce_namelen);
- fd = open(path.buf, O_WRONLY | O_CREAT | O_EXCL, mode);
-
- if (fd < 0) {
- if (errno == EEXIST || errno == EISDIR || errno == ENOENT ||
- errno == ENOTDIR) {
+ /*
+ * The main process already removed any non-directory file that was in
+ * the way. So if we find one, it's a path collision.
+ */
+ if (raceproof_create_file(path.buf, ci_open, &open_data)) {
+ if (errno == EEXIST || errno == ENOTDIR || errno == ENOENT) {
/*
* Errors which probably represent a path collision.
* Suppress the error message and mark the ci to be
@@ -325,6 +353,8 @@ void write_checkout_item(struct checkout *state, struct checkout_item *ci)
goto out;
}
+ fd = open_data.fd;
+
if (write_checkout_item_to_fd(fd, state, ci, path.buf)) {
/* Error was already reported. */
ci->status = CI_FAILED;
--
2.27.0
next prev parent reply other threads:[~2020-08-10 21:36 UTC|newest]
Thread overview: 154+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-10 21:33 [RFC PATCH 00/21] [RFC] Parallel checkout Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 01/21] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 02/21] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 03/21] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 04/21] convert: add conv_attrs classification Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 05/21] entry: extract a header file for entry.c functions Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 06/21] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 07/21] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 08/21] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 09/21] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 10/21] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 11/21] parallel-checkout: make it truly parallel Matheus Tavares
2020-08-19 21:34 ` Jeff Hostetler
2020-08-20 1:33 ` Matheus Tavares Bernardino
2020-08-20 14:39 ` Jeff Hostetler
2020-08-10 21:33 ` [RFC PATCH 12/21] parallel-checkout: add configuration options Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 13/21] parallel-checkout: support progress displaying Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 14/21] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 15/21] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 16/21] checkout-index: add " Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 17/21] parallel-checkout: avoid stat() calls in workers Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 18/21] entry: use is_dir_sep() when checking leading dirs Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 19/21] symlinks: make has_dirs_only_path() track FL_NOENT Matheus Tavares
2020-08-10 21:33 ` Matheus Tavares [this message]
2020-08-10 21:33 ` [RFC PATCH 21/21] parallel-checkout: skip checking the working tree on clone Matheus Tavares
2020-08-12 16:57 ` [RFC PATCH 00/21] [RFC] Parallel checkout Jeff Hostetler
2020-09-22 22:49 ` [PATCH v2 00/19] Parallel Checkout (part I) Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 01/19] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 02/19] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 03/19] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 04/19] convert: add conv_attrs classification Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 05/19] entry: extract a header file for entry.c functions Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 06/19] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 07/19] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 08/19] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-10-01 15:53 ` Jeff Hostetler
2020-10-01 15:59 ` Jeff Hostetler
2020-09-22 22:49 ` [PATCH v2 09/19] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 10/19] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-10-05 6:17 ` [PATCH] parallel-checkout: drop unused checkout state parameter Jeff King
2020-10-05 13:13 ` Matheus Tavares Bernardino
2020-10-05 13:45 ` Jeff King
2020-09-22 22:49 ` [PATCH v2 11/19] parallel-checkout: make it truly parallel Matheus Tavares
2020-09-29 19:52 ` Martin Ågren
2020-09-30 14:02 ` Matheus Tavares Bernardino
2020-09-22 22:49 ` [PATCH v2 12/19] parallel-checkout: support progress displaying Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 13/19] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 14/19] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 15/19] checkout-index: add " Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 16/19] parallel-checkout: add tests for basic operations Matheus Tavares
2020-10-20 1:35 ` Jonathan Nieder
2020-10-20 2:55 ` Taylor Blau
2020-10-20 13:18 ` Matheus Tavares Bernardino
2020-10-20 19:09 ` Junio C Hamano
2020-10-20 3:18 ` Matheus Tavares Bernardino
2020-10-20 4:16 ` Jonathan Nieder
2020-10-20 19:14 ` Junio C Hamano
2020-09-22 22:49 ` [PATCH v2 17/19] parallel-checkout: add tests related to clone collisions Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 18/19] parallel-checkout: add tests related to .gitattributes Matheus Tavares
2020-09-22 22:49 ` [PATCH v2 19/19] ci: run test round with parallel-checkout enabled Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 00/19] Parallel Checkout (part I) Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 01/19] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-10-29 23:40 ` Junio C Hamano
2020-10-30 17:01 ` Matheus Tavares Bernardino
2020-10-30 17:38 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 02/19] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-10-29 23:48 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 03/19] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-10-29 23:51 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 04/19] convert: add conv_attrs classification Matheus Tavares
2020-10-29 23:53 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 05/19] entry: extract a header file for entry.c functions Matheus Tavares
2020-10-30 21:36 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 06/19] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 07/19] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 08/19] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-10-30 21:58 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 09/19] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-10-30 22:02 ` Junio C Hamano
2020-10-29 2:14 ` [PATCH v3 10/19] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-11-02 19:35 ` Junio C Hamano
2020-11-03 3:48 ` Matheus Tavares Bernardino
2020-10-29 2:14 ` [PATCH v3 11/19] parallel-checkout: make it truly parallel Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 12/19] parallel-checkout: support progress displaying Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 13/19] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 14/19] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 15/19] checkout-index: add " Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 16/19] parallel-checkout: add tests for basic operations Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 17/19] parallel-checkout: add tests related to clone collisions Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 18/19] parallel-checkout: add tests related to .gitattributes Matheus Tavares
2020-10-29 2:14 ` [PATCH v3 19/19] ci: run test round with parallel-checkout enabled Matheus Tavares
2020-10-29 19:48 ` [PATCH v3 00/19] Parallel Checkout (part I) Junio C Hamano
2020-10-30 15:58 ` Jeff Hostetler
2020-11-04 20:32 ` [PATCH v4 " Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 01/19] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-12-05 10:40 ` Christian Couder
2020-12-05 21:53 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 02/19] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-12-05 11:10 ` Christian Couder
2020-12-05 22:20 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 03/19] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-12-05 11:45 ` Christian Couder
2020-11-04 20:33 ` [PATCH v4 04/19] convert: add conv_attrs classification Matheus Tavares
2020-12-05 12:07 ` Christian Couder
2020-12-05 22:08 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 05/19] entry: extract a header file for entry.c functions Matheus Tavares
2020-12-06 8:31 ` Christian Couder
2020-11-04 20:33 ` [PATCH v4 06/19] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 07/19] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-12-06 8:53 ` Christian Couder
2020-11-04 20:33 ` [PATCH v4 08/19] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-12-06 9:35 ` Christian Couder
2020-12-07 13:52 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 09/19] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-12-06 10:02 ` Christian Couder
2020-12-07 16:47 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 10/19] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-12-06 11:36 ` Christian Couder
2020-12-07 19:06 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 11/19] parallel-checkout: make it truly parallel Matheus Tavares
2020-12-16 22:31 ` Emily Shaffer
2020-12-17 15:00 ` Matheus Tavares Bernardino
2020-11-04 20:33 ` [PATCH v4 12/19] parallel-checkout: support progress displaying Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 13/19] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 14/19] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 15/19] checkout-index: add " Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 16/19] parallel-checkout: add tests for basic operations Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 17/19] parallel-checkout: add tests related to clone collisions Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 18/19] parallel-checkout: add tests related to .gitattributes Matheus Tavares
2020-11-04 20:33 ` [PATCH v4 19/19] ci: run test round with parallel-checkout enabled Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 0/9] Parallel Checkout (part I) Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 1/9] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 2/9] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 3/9] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 4/9] convert: add classification for conv_attrs struct Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 5/9] entry: extract a header file for entry.c functions Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 6/9] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 7/9] entry: extract update_ce_after_write() from write_entry() Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 8/9] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-12-16 14:50 ` [PATCH v5 9/9] entry: add checkout_entry_ca() taking preloaded conv_attrs Matheus Tavares
2020-12-16 15:27 ` [PATCH v5 0/9] Parallel Checkout (part I) Christian Couder
2020-12-17 1:11 ` Junio C Hamano
2021-03-23 14:19 ` [PATCH v6 0/9] Parallel Checkout (part 1) Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 1/9] convert: make convert_attrs() and convert structs public Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 2/9] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 3/9] convert: add get_stream_filter_ca() variant Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 4/9] convert: add classification for conv_attrs struct Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 5/9] entry: extract a header file for entry.c functions Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 6/9] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 7/9] entry: extract update_ce_after_write() from write_entry() Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 8/9] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2021-03-23 14:19 ` [PATCH v6 9/9] entry: add checkout_entry_ca() taking preloaded conv_attrs Matheus Tavares
2021-03-23 17:34 ` [PATCH v6 0/9] Parallel Checkout (part 1) Junio C Hamano
2020-10-01 16:42 ` [RFC PATCH 00/21] [RFC] Parallel checkout Jeff Hostetler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d9e8bad5d4e6cf3c44c6e48d28c40536afcba115.1597093021.git.matheus.bernardino@usp.br \
--to=matheus.bernardino@usp.br \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jeffhost@microsoft.com \
--cc=pclouds@gmail.com \
--cc=stolee@gmail.com \
--cc=t.gummerer@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).