Git Mailing List Archive on lore.kernel.org
 help / color / Atom feed
From: Matheus Tavares <matheus.bernardino@usp.br>
To: git@vger.kernel.org
Cc: christian.couder@gmail.com, gitster@pobox.com,
	jrnieder@gmail.com, olyatelezhnaya@gmail.com, pclouds@gmail.com,
	jonathantanmy@google.com, peff@peff.net,
	"brian m. carlson" <sandals@crustytoothpaste.net>,
	Stefan Beller <stefanbeller@gmail.com>,
	Brandon Williams <bwilliams.eng@gmail.com>
Subject: [PATCH v3 06/12] grep: replace grep_read_mutex by internal obj read lock
Date: Wed, 15 Jan 2020 23:39:54 -0300
Message-ID: <fc1200bb07f749420dad044d39dfe30ae73ad640.1579141989.git.matheus.bernardino@usp.br> (raw)
In-Reply-To: <cover.1579141989.git.matheus.bernardino@usp.br>

git-grep uses 'grep_read_mutex' to protect its calls to object reading
operations. But these have their own internal lock now, which ensures a
better performance (allowing parallel access to more regions). So, let's
remove the former and, instead, activate the latter with
enable_obj_read_lock().

Sections that are currently protected by 'grep_read_mutex' but are not
internally protected by the object reading lock should be surrounded by
obj_read_lock() and obj_read_unlock(). These guarantee mutual exclusion
with object reading operations, keeping the current behavior and
avoiding race conditions. Namely, these places are:

  In grep.c:

  - fill_textconv() at fill_textconv_grep().
  - userdiff_get_textconv() at grep_source_1().

  In builtin/grep.c:

  - parse_object_or_die() and the submodule functions at
    grep_submodule().
  - deref_tag() and gitmodules_config_oid() at grep_objects().

If these functions become thread-safe, in the future, we might remove
the locking and probably get some speedup.

Note that some of the submodule functions will already be thread-safe
(or close to being thread-safe) with the internal object reading lock.
However, as some of them will require additional modifications to be
removed from the critical section, this will be done in its own patch.

Signed-off-by: Matheus Tavares <matheus.bernardino@usp.br>
---
 builtin/grep.c | 46 ++++++++++++++++------------------------------
 grep.c         | 39 +++++++++++++++++++--------------------
 grep.h         | 13 -------------
 3 files changed, 35 insertions(+), 63 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 91fc032a32..4a436d6c99 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -200,12 +200,12 @@ static void start_threads(struct grep_opt *opt)
 	int i;
 
 	pthread_mutex_init(&grep_mutex, NULL);
-	pthread_mutex_init(&grep_read_mutex, NULL);
 	pthread_mutex_init(&grep_attr_mutex, NULL);
 	pthread_cond_init(&cond_add, NULL);
 	pthread_cond_init(&cond_write, NULL);
 	pthread_cond_init(&cond_result, NULL);
 	grep_use_locks = 1;
+	enable_obj_read_lock();
 
 	for (i = 0; i < ARRAY_SIZE(todo); i++) {
 		strbuf_init(&todo[i].out, 0);
@@ -257,12 +257,12 @@ static int wait_all(void)
 	free(threads);
 
 	pthread_mutex_destroy(&grep_mutex);
-	pthread_mutex_destroy(&grep_read_mutex);
 	pthread_mutex_destroy(&grep_attr_mutex);
 	pthread_cond_destroy(&cond_add);
 	pthread_cond_destroy(&cond_write);
 	pthread_cond_destroy(&cond_result);
 	grep_use_locks = 0;
+	disable_obj_read_lock();
 
 	return hit;
 }
@@ -295,16 +295,6 @@ static int grep_cmd_config(const char *var, const char *value, void *cb)
 	return st;
 }
 
-static void *lock_and_read_oid_file(const struct object_id *oid, enum object_type *type, unsigned long *size)
-{
-	void *data;
-
-	grep_read_lock();
-	data = read_object_file(oid, type, size);
-	grep_read_unlock();
-	return data;
-}
-
 static int grep_oid(struct grep_opt *opt, const struct object_id *oid,
 		     const char *filename, int tree_name_len,
 		     const char *path)
@@ -413,20 +403,20 @@ static int grep_submodule(struct grep_opt *opt,
 
 	/*
 	 * NEEDSWORK: submodules functions need to be protected because they
-	 * access the object store via config_from_gitmodules(): the latter
-	 * uses get_oid() which, for now, relies on the global the_repository
-	 * object.
+	 * call config_from_gitmodules(): the latter contains in its call stack
+	 * many thread-unsafe operations that are racy with object reading, such
+	 * as parse_object() and is_promisor_object().
 	 */
-	grep_read_lock();
+	obj_read_lock();
 	sub = submodule_from_path(superproject, &null_oid, path);
 
 	if (!is_submodule_active(superproject, path)) {
-		grep_read_unlock();
+		obj_read_unlock();
 		return 0;
 	}
 
 	if (repo_submodule_init(&subrepo, superproject, sub)) {
-		grep_read_unlock();
+		obj_read_unlock();
 		return 0;
 	}
 
@@ -443,7 +433,7 @@ static int grep_submodule(struct grep_opt *opt,
 	 * object.
 	 */
 	add_to_alternates_memory(subrepo.objects->odb->path);
-	grep_read_unlock();
+	obj_read_unlock();
 
 	memcpy(&subopt, opt, sizeof(subopt));
 	subopt.repo = &subrepo;
@@ -455,13 +445,12 @@ static int grep_submodule(struct grep_opt *opt,
 		unsigned long size;
 		struct strbuf base = STRBUF_INIT;
 
-		grep_read_lock();
+		obj_read_lock();
 		object = parse_object_or_die(oid, oid_to_hex(oid));
+		obj_read_unlock();
 		data = read_object_with_reference(&subrepo,
 						  &object->oid, tree_type,
 						  &size, NULL);
-		grep_read_unlock();
-
 		if (!data)
 			die(_("unable to read tree (%s)"), oid_to_hex(&object->oid));
 
@@ -586,7 +575,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
 			void *data;
 			unsigned long size;
 
-			data = lock_and_read_oid_file(&entry.oid, &type, &size);
+			data = read_object_file(&entry.oid, &type, &size);
 			if (!data)
 				die(_("unable to read tree (%s)"),
 				    oid_to_hex(&entry.oid));
@@ -624,12 +613,9 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
 		struct strbuf base;
 		int hit, len;
 
-		grep_read_lock();
 		data = read_object_with_reference(opt->repo,
 						  &obj->oid, tree_type,
 						  &size, NULL);
-		grep_read_unlock();
-
 		if (!data)
 			die(_("unable to read tree (%s)"), oid_to_hex(&obj->oid));
 
@@ -659,17 +645,17 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
 	for (i = 0; i < nr; i++) {
 		struct object *real_obj;
 
-		grep_read_lock();
+		obj_read_lock();
 		real_obj = deref_tag(opt->repo, list->objects[i].item,
 				     NULL, 0);
-		grep_read_unlock();
+		obj_read_unlock();
 
 		/* load the gitmodules file for this rev */
 		if (recurse_submodules) {
 			submodule_free(opt->repo);
-			grep_read_lock();
+			obj_read_lock();
 			gitmodules_config_oid(&real_obj->oid);
-			grep_read_unlock();
+			obj_read_unlock();
 		}
 		if (grep_object(opt, pathspec, real_obj, list->objects[i].name,
 				list->objects[i].path)) {
diff --git a/grep.c b/grep.c
index c028f70aba..13232a904a 100644
--- a/grep.c
+++ b/grep.c
@@ -1540,11 +1540,6 @@ static inline void grep_attr_unlock(void)
 		pthread_mutex_unlock(&grep_attr_mutex);
 }
 
-/*
- * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
- */
-pthread_mutex_t grep_read_mutex;
-
 static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
 {
 	xdemitconf_t *xecfg = opt->priv;
@@ -1741,13 +1736,20 @@ static int fill_textconv_grep(struct repository *r,
 	}
 
 	/*
-	 * fill_textconv is not remotely thread-safe; it may load objects
-	 * behind the scenes, and it modifies the global diff tempfile
-	 * structure.
+	 * fill_textconv is not remotely thread-safe; it modifies the global
+	 * diff tempfile structure, writes to the_repo's odb and might
+	 * internally call thread-unsafe functions such as the
+	 * prepare_packed_git() lazy-initializator. Because of the last two, we
+	 * must ensure mutual exclusion between this call and the object reading
+	 * API, thus we use obj_read_lock() here.
+	 *
+	 * TODO: allowing text conversion to run in parallel with object
+	 * reading operations might increase performance in the multithreaded
+	 * non-worktreee git-grep with --textconv.
 	 */
-	grep_read_lock();
+	obj_read_lock();
 	size = fill_textconv(r, driver, df, &buf);
-	grep_read_unlock();
+	obj_read_unlock();
 	free_filespec(df);
 
 	/*
@@ -1813,12 +1815,15 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 		grep_source_load_driver(gs, opt->repo->index);
 		/*
 		 * We might set up the shared textconv cache data here, which
-		 * is not thread-safe.
+		 * is not thread-safe. Also, get_oid_with_context() and
+		 * parse_object() might be internally called. As they are not
+		 * currenty thread-safe and might be racy with object reading,
+		 * obj_read_lock() must be called.
 		 */
 		grep_attr_lock();
-		grep_read_lock();
+		obj_read_lock();
 		textconv = userdiff_get_textconv(opt->repo, gs->driver);
-		grep_read_unlock();
+		obj_read_unlock();
 		grep_attr_unlock();
 	}
 
@@ -2118,10 +2123,7 @@ static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
 
-	grep_read_lock();
 	gs->buf = read_object_file(gs->identifier, &type, &gs->size);
-	grep_read_unlock();
-
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,
@@ -2186,11 +2188,8 @@ void grep_source_load_driver(struct grep_source *gs,
 		return;
 
 	grep_attr_lock();
-	if (gs->path) {
-		grep_read_lock();
+	if (gs->path)
 		gs->driver = userdiff_find_by_path(istate, gs->path);
-		grep_read_unlock();
-	}
 	if (!gs->driver)
 		gs->driver = userdiff_find_by_name("default");
 	grep_attr_unlock();
diff --git a/grep.h b/grep.h
index 811fd274c9..9115db8515 100644
--- a/grep.h
+++ b/grep.h
@@ -220,18 +220,5 @@ int grep_threads_ok(const struct grep_opt *opt);
  */
 extern int grep_use_locks;
 extern pthread_mutex_t grep_attr_mutex;
-extern pthread_mutex_t grep_read_mutex;
-
-static inline void grep_read_lock(void)
-{
-	if (grep_use_locks)
-		pthread_mutex_lock(&grep_read_mutex);
-}
-
-static inline void grep_read_unlock(void)
-{
-	if (grep_use_locks)
-		pthread_mutex_unlock(&grep_read_mutex);
-}
 
 #endif
-- 
2.24.1


  parent reply index

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-10 20:27 [GSoC][PATCH 0/4] grep: re-enable threads when cached, w/ parallel inflation Matheus Tavares
2019-09-30  1:50 ` [PATCH v2 00/11] grep: improve threading and fix race conditions Matheus Tavares
2020-01-16  2:39   ` [PATCH v3 00/12] " Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 01/12] grep: fix race conditions on userdiff calls Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 02/12] grep: fix race conditions at grep_submodule() Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 03/12] grep: fix racy calls in grep_objects() Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 04/12] replace-object: make replace operations thread-safe Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 05/12] object-store: allow threaded access to object reading Matheus Tavares
2020-01-16  2:39     ` Matheus Tavares [this message]
2020-01-16  2:39     ` [PATCH v3 07/12] submodule-config: add skip_if_read option to repo_read_gitmodules() Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 08/12] grep: allow submodule functions to run in parallel Matheus Tavares
2020-01-29 11:26       ` SZEDER Gábor
2020-01-29 18:49         ` Junio C Hamano
2020-01-29 18:57         ` Junio C Hamano
2020-01-29 20:42           ` Matheus Tavares Bernardino
2020-01-30 13:28             ` Philippe Blain
2020-01-16  2:39     ` [PATCH v3 09/12] grep: protect packed_git [re-]initialization Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 10/12] grep: re-enable threads in non-worktree case Matheus Tavares
2020-01-16  2:39     ` [PATCH v3 11/12] grep: move driver pre-load out of critical section Matheus Tavares
2020-01-16  2:40     ` [PATCH v3 12/12] grep: use no. of cores as the default no. of threads Matheus Tavares
2020-01-16 13:11       ` Victor Leschuk
2020-01-16 14:47         ` [PATCH] " Matheus Tavares

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fc1200bb07f749420dad044d39dfe30ae73ad640.1579141989.git.matheus.bernardino@usp.br \
    --to=matheus.bernardino@usp.br \
    --cc=bwilliams.eng@gmail.com \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    --cc=jrnieder@gmail.com \
    --cc=olyatelezhnaya@gmail.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=sandals@crustytoothpaste.net \
    --cc=stefanbeller@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Mailing List Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/git/0 git/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 git git/ https://lore.kernel.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.git


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git