All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matheus Tavares <matheus.bernardino@usp.br>
To: git@vger.kernel.org
Cc: stolee@gmail.com, jeffhost@microsoft.com,
	"Junio C Hamano" <gitster@pobox.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Thomas Gummerer" <t.gummerer@gmail.com>
Subject: [RFC PATCH 20/21] parallel-checkout: create leading dirs in workers
Date: Mon, 10 Aug 2020 18:33:28 -0300	[thread overview]
Message-ID: <d9e8bad5d4e6cf3c44c6e48d28c40536afcba115.1597093021.git.matheus.bernardino@usp.br> (raw)
In-Reply-To: <cover.1597093021.git.matheus.bernardino@usp.br>

Allow the parallel workers to create the leading directories of the
entries being checked out, instead of pre-creating them in the main
process. This optimization should be more effective on file systems with
higher I/O latency.

Part of the process of creating leading dirs is the removal of any
non-directory file that could be in the way. This is currently done
inside entry.c:create_directories(). However, if we were to move this to
the workers as well, we would risk removing a file just written by
another worker, which collided with the one currently being written.  In
a worse scenario, we could remove the file right after a worker have
closed it but before it called stat(). To avoid these problems, let's
remove the non-directory files in the main process. And to avoid the
cost of extra lstat() calls in this process, we use
has_dirs_only_path(), which will have the necessary information already
cached from check_path().

Finally, to create the leading dirs in the workers, we could re-use
create_directories(). But, unlike the main process, we wouldn't have the
stat() information cached. Thus, let's use raceproof_create_file(),
which will only stat() the path components after a open() failure,
saving us time when creating subsequent files in the same directory.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 entry.c             | 45 ++++++++++++++++++++++++++++++++++++++++++---
 parallel-checkout.c | 42 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/entry.c b/entry.c
index e876adff19..5dfd4d150d 100644
--- a/entry.c
+++ b/entry.c
@@ -57,6 +57,43 @@ static void create_directories(const char *path, int path_len,
 	free(buf);
 }
 
+static void remove_non_dirs(const char *path, int path_len,
+			    const struct checkout *state)
+{
+	char *buf = xmallocz(path_len);
+	int len = 0;
+
+	while (len < path_len) {
+		int ret;
+
+		do {
+			buf[len] = path[len];
+			len++;
+		} while (len < path_len && !is_dir_sep(path[len]));
+		if (len >= path_len)
+			break;
+		buf[len] = 0;
+
+		ret = has_dirs_only_path(buf, len, state->base_dir_len);
+
+		if (ret > 0)
+			continue; /* Is directory. */
+		if (ret < 0)
+			break; /* No entry */
+
+		/* ret == 0: not a directory, let's unlink it. */
+
+		if (!state->force)
+			die("'%s' already exists, and it's not a directory", buf);
+
+		if (unlink(buf))
+			die_errno("cannot unlink '%s'", buf);
+		else
+			break;
+	}
+	free(buf);
+}
+
 static void remove_subtree(struct strbuf *path)
 {
 	DIR *dir = opendir(path->buf);
@@ -555,8 +592,6 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
 	} else if (state->not_new)
 		return 0;
 
-	create_directories(path.buf, path.len, state);
-
 	if (nr_checkouts)
 		(*nr_checkouts)++;
 
@@ -565,9 +600,13 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca,
 		ca = &ca_buf;
 	}
 
-	if (!enqueue_checkout(ce, ca))
+	if (!enqueue_checkout(ce, ca)) {
+		/* "clean" path so that workers can create leading dirs */
+		remove_non_dirs(path.buf, path.len, state);
 		return 0;
+	}
 
+	create_directories(path.buf, path.len, state);
 	return write_entry(ce, path.buf, ca, state, 0);
 }
 
diff --git a/parallel-checkout.c b/parallel-checkout.c
index 4d72540256..5b73d8fa4b 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -298,20 +298,48 @@ static int close_and_clear(int *fd)
 	return ret;
 }
 
+struct ci_open_data {
+	int fd;
+	unsigned int mode;
+};
+
+static int ci_open(const char *path, void *cb)
+{
+	struct ci_open_data *data = cb;
+	data->fd = open(path, O_WRONLY | O_CREAT | O_EXCL, data->mode);
+
+	if (data->fd < 0) {
+		/*
+		 * EISDIR can only indicate path collisions among the entries
+		 * being checked out. We don't need raceproof_create_file() to
+		 * try removing empty dirs. Instead, just let the caller known
+		 * that the path already exists, so that the collision can be
+		 * properly handled later.
+		 */
+		if (errno == EISDIR)
+			errno = EEXIST;
+		return 1;
+	}
+
+	return 0;
+}
+
 void write_checkout_item(struct checkout *state, struct checkout_item *ci)
 {
-	unsigned int mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
+	struct ci_open_data open_data;
 	int fd = -1, fstat_done = 0;
 	struct strbuf path = STRBUF_INIT;
 
+	open_data.mode = (ci->ce->ce_mode & 0100) ? 0777 : 0666;
 	strbuf_add(&path, state->base_dir, state->base_dir_len);
 	strbuf_add(&path, ci->ce->name, ci->ce->ce_namelen);
 
-	fd = open(path.buf, O_WRONLY | O_CREAT | O_EXCL, mode);
-
-	if (fd < 0) {
-		if (errno == EEXIST || errno == EISDIR || errno == ENOENT ||
-		    errno == ENOTDIR) {
+	/*
+	 * The main process already removed any non-directory file that was in
+	 * the way. So if we find one, it's a path collision.
+	 */
+	if (raceproof_create_file(path.buf, ci_open, &open_data)) {
+		if (errno == EEXIST || errno == ENOTDIR || errno == ENOENT) {
 			/*
 			 * Errors which probably represent a path collision.
 			 * Suppress the error message and mark the ci to be
@@ -325,6 +353,8 @@ void write_checkout_item(struct checkout *state, struct checkout_item *ci)
 		goto out;
 	}
 
+	fd = open_data.fd;
+
 	if (write_checkout_item_to_fd(fd, state, ci, path.buf)) {
 		/* Error was already reported. */
 		ci->status = CI_FAILED;
-- 
2.27.0


  parent reply	other threads:[~2020-08-10 21:35 UTC|newest]

Thread overview: 154+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-10 21:33 [RFC PATCH 00/21] [RFC] Parallel checkout Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 01/21] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 02/21] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 03/21] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 04/21] convert: add conv_attrs classification Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 05/21] entry: extract a header file for entry.c functions Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 06/21] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 07/21] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 08/21] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 09/21] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 10/21] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 11/21] parallel-checkout: make it truly parallel Matheus Tavares
2020-08-19 21:34   ` Jeff Hostetler
2020-08-20  1:33     ` Matheus Tavares Bernardino
2020-08-20 14:39       ` Jeff Hostetler
2020-08-10 21:33 ` [RFC PATCH 12/21] parallel-checkout: add configuration options Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 13/21] parallel-checkout: support progress displaying Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 14/21] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 15/21] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 16/21] checkout-index: add " Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 17/21] parallel-checkout: avoid stat() calls in workers Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 18/21] entry: use is_dir_sep() when checking leading dirs Matheus Tavares
2020-08-10 21:33 ` [RFC PATCH 19/21] symlinks: make has_dirs_only_path() track FL_NOENT Matheus Tavares
2020-08-10 21:33 ` Matheus Tavares [this message]
2020-08-10 21:33 ` [RFC PATCH 21/21] parallel-checkout: skip checking the working tree on clone Matheus Tavares
2020-08-12 16:57 ` [RFC PATCH 00/21] [RFC] Parallel checkout Jeff Hostetler
2020-09-22 22:49 ` [PATCH v2 00/19] Parallel Checkout (part I) Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 01/19] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 02/19] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 03/19] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 04/19] convert: add conv_attrs classification Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 05/19] entry: extract a header file for entry.c functions Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 06/19] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 07/19] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 08/19] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-10-01 15:53     ` Jeff Hostetler
2020-10-01 15:59       ` Jeff Hostetler
2020-09-22 22:49   ` [PATCH v2 09/19] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 10/19] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-10-05  6:17     ` [PATCH] parallel-checkout: drop unused checkout state parameter Jeff King
2020-10-05 13:13       ` Matheus Tavares Bernardino
2020-10-05 13:45         ` Jeff King
2020-09-22 22:49   ` [PATCH v2 11/19] parallel-checkout: make it truly parallel Matheus Tavares
2020-09-29 19:52     ` Martin Ågren
2020-09-30 14:02       ` Matheus Tavares Bernardino
2020-09-22 22:49   ` [PATCH v2 12/19] parallel-checkout: support progress displaying Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 13/19] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 14/19] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 15/19] checkout-index: add " Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 16/19] parallel-checkout: add tests for basic operations Matheus Tavares
2020-10-20  1:35     ` Jonathan Nieder
2020-10-20  2:55       ` Taylor Blau
2020-10-20 13:18         ` Matheus Tavares Bernardino
2020-10-20 19:09           ` Junio C Hamano
2020-10-20  3:18       ` Matheus Tavares Bernardino
2020-10-20  4:16         ` Jonathan Nieder
2020-10-20 19:14         ` Junio C Hamano
2020-09-22 22:49   ` [PATCH v2 17/19] parallel-checkout: add tests related to clone collisions Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 18/19] parallel-checkout: add tests related to .gitattributes Matheus Tavares
2020-09-22 22:49   ` [PATCH v2 19/19] ci: run test round with parallel-checkout enabled Matheus Tavares
2020-10-29  2:14   ` [PATCH v3 00/19] Parallel Checkout (part I) Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 01/19] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-10-29 23:40       ` Junio C Hamano
2020-10-30 17:01         ` Matheus Tavares Bernardino
2020-10-30 17:38           ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 02/19] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-10-29 23:48       ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 03/19] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-10-29 23:51       ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 04/19] convert: add conv_attrs classification Matheus Tavares
2020-10-29 23:53       ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 05/19] entry: extract a header file for entry.c functions Matheus Tavares
2020-10-30 21:36       ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 06/19] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 07/19] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 08/19] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-10-30 21:58       ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 09/19] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-10-30 22:02       ` Junio C Hamano
2020-10-29  2:14     ` [PATCH v3 10/19] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-11-02 19:35       ` Junio C Hamano
2020-11-03  3:48         ` Matheus Tavares Bernardino
2020-10-29  2:14     ` [PATCH v3 11/19] parallel-checkout: make it truly parallel Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 12/19] parallel-checkout: support progress displaying Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 13/19] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 14/19] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 15/19] checkout-index: add " Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 16/19] parallel-checkout: add tests for basic operations Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 17/19] parallel-checkout: add tests related to clone collisions Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 18/19] parallel-checkout: add tests related to .gitattributes Matheus Tavares
2020-10-29  2:14     ` [PATCH v3 19/19] ci: run test round with parallel-checkout enabled Matheus Tavares
2020-10-29 19:48     ` [PATCH v3 00/19] Parallel Checkout (part I) Junio C Hamano
2020-10-30 15:58     ` Jeff Hostetler
2020-11-04 20:32     ` [PATCH v4 " Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 01/19] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-12-05 10:40         ` Christian Couder
2020-12-05 21:53           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 02/19] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-12-05 11:10         ` Christian Couder
2020-12-05 22:20           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 03/19] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-12-05 11:45         ` Christian Couder
2020-11-04 20:33       ` [PATCH v4 04/19] convert: add conv_attrs classification Matheus Tavares
2020-12-05 12:07         ` Christian Couder
2020-12-05 22:08           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 05/19] entry: extract a header file for entry.c functions Matheus Tavares
2020-12-06  8:31         ` Christian Couder
2020-11-04 20:33       ` [PATCH v4 06/19] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 07/19] entry: extract cache_entry update from write_entry() Matheus Tavares
2020-12-06  8:53         ` Christian Couder
2020-11-04 20:33       ` [PATCH v4 08/19] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-12-06  9:35         ` Christian Couder
2020-12-07 13:52           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 09/19] entry: add checkout_entry_ca() which takes preloaded conv_attrs Matheus Tavares
2020-12-06 10:02         ` Christian Couder
2020-12-07 16:47           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 10/19] unpack-trees: add basic support for parallel checkout Matheus Tavares
2020-12-06 11:36         ` Christian Couder
2020-12-07 19:06           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 11/19] parallel-checkout: make it truly parallel Matheus Tavares
2020-12-16 22:31         ` Emily Shaffer
2020-12-17 15:00           ` Matheus Tavares Bernardino
2020-11-04 20:33       ` [PATCH v4 12/19] parallel-checkout: support progress displaying Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 13/19] make_transient_cache_entry(): optionally alloc from mem_pool Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 14/19] builtin/checkout.c: complete parallel checkout support Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 15/19] checkout-index: add " Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 16/19] parallel-checkout: add tests for basic operations Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 17/19] parallel-checkout: add tests related to clone collisions Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 18/19] parallel-checkout: add tests related to .gitattributes Matheus Tavares
2020-11-04 20:33       ` [PATCH v4 19/19] ci: run test round with parallel-checkout enabled Matheus Tavares
2020-12-16 14:50       ` [PATCH v5 0/9] Parallel Checkout (part I) Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 1/9] convert: make convert_attrs() and convert structs public Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 2/9] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 3/9] convert: add get_stream_filter_ca() variant Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 4/9] convert: add classification for conv_attrs struct Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 5/9] entry: extract a header file for entry.c functions Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 6/9] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 7/9] entry: extract update_ce_after_write() from write_entry() Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 8/9] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2020-12-16 14:50         ` [PATCH v5 9/9] entry: add checkout_entry_ca() taking preloaded conv_attrs Matheus Tavares
2020-12-16 15:27         ` [PATCH v5 0/9] Parallel Checkout (part I) Christian Couder
2020-12-17  1:11         ` Junio C Hamano
2021-03-23 14:19         ` [PATCH v6 0/9] Parallel Checkout (part 1) Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 1/9] convert: make convert_attrs() and convert structs public Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 2/9] convert: add [async_]convert_to_working_tree_ca() variants Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 3/9] convert: add get_stream_filter_ca() variant Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 4/9] convert: add classification for conv_attrs struct Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 5/9] entry: extract a header file for entry.c functions Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 6/9] entry: make fstat_output() and read_blob_entry() public Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 7/9] entry: extract update_ce_after_write() from write_entry() Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 8/9] entry: move conv_attrs lookup up to checkout_entry() Matheus Tavares
2021-03-23 14:19           ` [PATCH v6 9/9] entry: add checkout_entry_ca() taking preloaded conv_attrs Matheus Tavares
2021-03-23 17:34           ` [PATCH v6 0/9] Parallel Checkout (part 1) Junio C Hamano
2020-10-01 16:42 ` [RFC PATCH 00/21] [RFC] Parallel checkout Jeff Hostetler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d9e8bad5d4e6cf3c44c6e48d28c40536afcba115.1597093021.git.matheus.bernardino@usp.br \
    --to=matheus.bernardino@usp.br \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jeffhost@microsoft.com \
    --cc=pclouds@gmail.com \
    --cc=stolee@gmail.com \
    --cc=t.gummerer@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.