Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code
@ 2019-09-06  3:33 Darrick J. Wong
  2019-09-06  3:34 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:33 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

Refactor the code that deals with queueing and unqueueing directories to
process via thread pool, and thereby solve a few deadlock bugs.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=scrub-fix-vfs-walk

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item
  2019-09-06  3:33 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
@ 2019-09-06  3:34 ` Darrick J. Wong
  2019-09-10  1:04   ` Dave Chinner
  2019-09-06  3:34 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
  2019-09-06  3:34 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
  2 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:34 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Replace the open-coded process of queueing a subdirectory for scanning
with a single helper function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |  109 ++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 67 insertions(+), 42 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index b5d54837..add4e815 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -43,6 +43,57 @@ struct scan_fs_tree_dir {
 	bool			rootdir;
 };
 
+static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
+
+/* Queue a directory for scanning. */
+static bool
+queue_subdir(
+	struct scrub_ctx	*ctx,
+	struct scan_fs_tree	*sft,
+	struct workqueue	*wq,
+	const char		*path,
+	bool			is_rootdir)
+{
+	struct scan_fs_tree_dir	*new_sftd;
+	int			error;
+
+	new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
+	if (!new_sftd) {
+		str_errno(ctx, _("creating directory scan context"));
+		return false;
+	}
+
+	new_sftd->path = strdup(path);
+	if (!new_sftd->path) {
+		str_errno(ctx, _("creating directory scan path"));
+		goto out_sftd;
+	}
+
+	new_sftd->sft = sft;
+	new_sftd->rootdir = is_rootdir;
+
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs++;
+	pthread_mutex_unlock(&sft->lock);
+	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
+	if (error) {
+		/*
+		 * XXX: need to decrement nr_dirs here; will do that in the
+		 * next patch.
+		 */
+		str_info(ctx, ctx->mntpoint,
+_("Could not queue subdirectory scan work."));
+		goto out_path;
+	}
+
+	return true;
+out_path:
+	free(new_sftd->path);
+out_sftd:
+	free(new_sftd);
+	return false;
+}
+
 /* Scan a directory sub tree. */
 static void
 scan_fs_dir(
@@ -56,7 +107,6 @@ scan_fs_dir(
 	DIR			*dir;
 	struct dirent		*dirent;
 	char			newpath[PATH_MAX];
-	struct scan_fs_tree_dir	*new_sftd;
 	struct stat		sb;
 	int			dir_fd;
 	int			error;
@@ -117,25 +167,10 @@ scan_fs_dir(
 		/* If directory, call ourselves recursively. */
 		if (S_ISDIR(sb.st_mode) && strcmp(".", dirent->d_name) &&
 		    strcmp("..", dirent->d_name)) {
-			new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
-			if (!new_sftd) {
-				str_errno(ctx, newpath);
-				sft->moveon = false;
-				break;
-			}
-			new_sftd->path = strdup(newpath);
-			new_sftd->sft = sft;
-			new_sftd->rootdir = false;
-			pthread_mutex_lock(&sft->lock);
-			sft->nr_dirs++;
-			pthread_mutex_unlock(&sft->lock);
-			error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
-			if (error) {
-				str_info(ctx, ctx->mntpoint,
-_("Could not queue subdirectory scan work."));
-				sft->moveon = false;
+			sft->moveon = queue_subdir(ctx, sft, wq, newpath,
+					false);
+			if (!sft->moveon)
 				break;
-			}
 		}
 	}
 
@@ -165,11 +200,10 @@ scan_fs_tree(
 {
 	struct workqueue	wq;
 	struct scan_fs_tree	sft;
-	struct scan_fs_tree_dir	*sftd;
 	int			ret;
 
 	sft.moveon = true;
-	sft.nr_dirs = 1;
+	sft.nr_dirs = 0;
 	sft.root_sb = ctx->mnt_sb;
 	sft.dir_fn = dir_fn;
 	sft.dirent_fn = dirent_fn;
@@ -177,41 +211,32 @@ scan_fs_tree(
 	pthread_mutex_init(&sft.lock, NULL);
 	pthread_cond_init(&sft.wakeup, NULL);
 
-	sftd = malloc(sizeof(struct scan_fs_tree_dir));
-	if (!sftd) {
-		str_errno(ctx, ctx->mntpoint);
-		return false;
-	}
-	sftd->path = strdup(ctx->mntpoint);
-	sftd->sft = &sft;
-	sftd->rootdir = true;
-
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
 		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
-		goto out_free;
+		return false;
 	}
-	ret = workqueue_add(&wq, scan_fs_dir, 0, sftd);
-	if (ret) {
-		str_info(ctx, ctx->mntpoint,
-_("Could not queue directory scan work."));
+
+	sft.moveon = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
+	if (!sft.moveon)
 		goto out_wq;
-	}
 
+	/*
+	 * Wait for the wakeup to trigger, which should only happen when the
+	 * last worker thread decrements nr_dirs to zero.  Once the worker
+	 * triggers the wakeup and unlocks the sft lock, it's no longer safe
+	 * for any worker thread to access sft, as we now own the lock and are
+	 * about to tear everything down.
+	 */
 	pthread_mutex_lock(&sft.lock);
 	pthread_cond_wait(&sft.wakeup, &sft.lock);
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
-	workqueue_destroy(&wq);
 
-	return sft.moveon;
 out_wq:
 	workqueue_destroy(&wq);
-out_free:
-	free(sftd->path);
-	free(sftd);
-	return false;
+	return sft.moveon;
 }
 
 #ifndef FITRIM


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems
  2019-09-06  3:33 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
  2019-09-06  3:34 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
@ 2019-09-06  3:34 ` Darrick J. Wong
  2019-09-06  3:34 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
  2 siblings, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:34 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, Dave Chinner

From: Darrick J. Wong <darrick.wong@oracle.com>

When we're scanning the directory tree, we bump nr_dirs every time we
think we're going to queue a new directory to process, and we decrement
it every time we're finished doing something with a directory
(successful or not).  We forgot to undo a counter increment when
workqueue_add fails, so refactor the code into helpers and call them
as necessary for correct operation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 scrub/vfs.c |   42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index add4e815..f8bc98c0 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -45,6 +45,32 @@ struct scan_fs_tree_dir {
 
 static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
 
+/* Increment the number of directories that are queued for processing. */
+static void
+inc_nr_dirs(
+	struct scan_fs_tree	*sft)
+{
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs++;
+	pthread_mutex_unlock(&sft->lock);
+}
+
+/*
+ * Decrement the number of directories that are queued for processing and if
+ * we ran out of dirs to process, wake up anyone who was waiting for processing
+ * to finish.
+ */
+static void
+dec_nr_dirs(
+	struct scan_fs_tree	*sft)
+{
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs--;
+	if (sft->nr_dirs == 0)
+		pthread_cond_signal(&sft->wakeup);
+	pthread_mutex_unlock(&sft->lock);
+}
+
 /* Queue a directory for scanning. */
 static bool
 queue_subdir(
@@ -72,15 +98,10 @@ queue_subdir(
 	new_sftd->sft = sft;
 	new_sftd->rootdir = is_rootdir;
 
-	pthread_mutex_lock(&sft->lock);
-	sft->nr_dirs++;
-	pthread_mutex_unlock(&sft->lock);
+	inc_nr_dirs(sft);
 	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
 	if (error) {
-		/*
-		 * XXX: need to decrement nr_dirs here; will do that in the
-		 * next patch.
-		 */
+		dec_nr_dirs(sft);
 		str_info(ctx, ctx->mntpoint,
 _("Could not queue subdirectory scan work."));
 		goto out_path;
@@ -180,12 +201,7 @@ scan_fs_dir(
 		str_errno(ctx, sftd->path);
 
 out:
-	pthread_mutex_lock(&sft->lock);
-	sft->nr_dirs--;
-	if (sft->nr_dirs == 0)
-		pthread_cond_signal(&sft->wakeup);
-	pthread_mutex_unlock(&sft->lock);
-
+	dec_nr_dirs(sft);
 	free(sftd->path);
 	free(sftd);
 }


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree
  2019-09-06  3:33 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
  2019-09-06  3:34 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
  2019-09-06  3:34 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
@ 2019-09-06  3:34 ` Darrick J. Wong
  2 siblings, 0 replies; 7+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:34 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, Dave Chinner

From: Darrick J. Wong <darrick.wong@oracle.com>

We don't need to wait on the condition variable if directory tree
scanning has already finished by the time we've finished queueing all
the directory work items.  This is easy to trigger when the workqueue is
single-threaded, but in theory it could happen any time.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 scrub/vfs.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index f8bc98c0..1a1482dd 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -246,7 +246,8 @@ scan_fs_tree(
 	 * about to tear everything down.
 	 */
 	pthread_mutex_lock(&sft.lock);
-	pthread_cond_wait(&sft.wakeup, &sft.lock);
+	if (sft.nr_dirs)
+		pthread_cond_wait(&sft.wakeup, &sft.lock);
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item
  2019-09-06  3:34 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
@ 2019-09-10  1:04   ` Dave Chinner
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Chinner @ 2019-09-10  1:04 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Thu, Sep 05, 2019 at 08:34:03PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Replace the open-coded process of queueing a subdirectory for scanning
> with a single helper function.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>

Looks good.


Reviewed-by: Dave Chinner <dchinner@redhat.com>
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems
  2019-08-26 21:21 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
@ 2019-09-04  8:15   ` Dave Chinner
  0 siblings, 0 replies; 7+ messages in thread
From: Dave Chinner @ 2019-09-04  8:15 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:21:25PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> When we're scanning the directory tree, we bump nr_dirs every time we
> think we're going to queue a new directory to process, and we decrement
> it every time we're finished doing something with a directory
> (successful or not).  We forgot to undo a counter increment when
> workqueue_add fails, so refactor the code into helpers and call them
> as necessary for correct operation.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/vfs.c |   38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
> 
> 
> diff --git a/scrub/vfs.c b/scrub/vfs.c
> index ea2866d9..b358ab4a 100644
> --- a/scrub/vfs.c
> +++ b/scrub/vfs.c
> @@ -45,6 +45,32 @@ struct scan_fs_tree_dir {
>  
>  static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
>  
> +/* Increment the number of directories that are queued for processing. */
> +static void
> +inc_nr_dirs(
> +	struct scan_fs_tree	*sft)
> +{
> +	pthread_mutex_lock(&sft->lock);
> +	sft->nr_dirs++;
> +	pthread_mutex_unlock(&sft->lock);
> +}
> +
> +/*
> + * Decrement the number of directories that are queued for processing and if
> + * we ran out of dirs to process, wake up anyone who was waiting for processing
> + * to finish.
> + */
> +static void
> +dec_nr_dirs(
> +	struct scan_fs_tree	*sft)
> +{
> +	pthread_mutex_lock(&sft->lock);
> +	sft->nr_dirs--;
> +	if (sft->nr_dirs == 0)
> +		pthread_cond_signal(&sft->wakeup);
> +	pthread_mutex_unlock(&sft->lock);
> +}
> +
>  /* Queue a directory for scanning. */
>  static bool
>  queue_subdir(
> @@ -73,11 +99,10 @@ queue_subdir(
>  	new_sftd->sft = sft;
>  	new_sftd->rootdir = is_rootdir;
>  
> -	pthread_mutex_lock(&sft->lock);
> -	sft->nr_dirs++;
> -	pthread_mutex_unlock(&sft->lock);
> +	inc_nr_dirs(sft);
>  	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
>  	if (error) {
> +		dec_nr_dirs(sft);
>  		str_info(ctx, ctx->mntpoint,
>  _("Could not queue subdirectory scan work."));
>  		return false;

Ok, that's the bug fix for the previous patch. Potentially should be
a separate patch, but right now there is so much outstanding that I
don't think it's worthwhile to respin the series just to fix that.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems
  2019-08-26 21:21 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
@ 2019-08-26 21:21 ` Darrick J. Wong
  2019-09-04  8:15   ` Dave Chinner
  0 siblings, 1 reply; 7+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

When we're scanning the directory tree, we bump nr_dirs every time we
think we're going to queue a new directory to process, and we decrement
it every time we're finished doing something with a directory
(successful or not).  We forgot to undo a counter increment when
workqueue_add fails, so refactor the code into helpers and call them
as necessary for correct operation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |   38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index ea2866d9..b358ab4a 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -45,6 +45,32 @@ struct scan_fs_tree_dir {
 
 static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
 
+/* Increment the number of directories that are queued for processing. */
+static void
+inc_nr_dirs(
+	struct scan_fs_tree	*sft)
+{
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs++;
+	pthread_mutex_unlock(&sft->lock);
+}
+
+/*
+ * Decrement the number of directories that are queued for processing and if
+ * we ran out of dirs to process, wake up anyone who was waiting for processing
+ * to finish.
+ */
+static void
+dec_nr_dirs(
+	struct scan_fs_tree	*sft)
+{
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs--;
+	if (sft->nr_dirs == 0)
+		pthread_cond_signal(&sft->wakeup);
+	pthread_mutex_unlock(&sft->lock);
+}
+
 /* Queue a directory for scanning. */
 static bool
 queue_subdir(
@@ -73,11 +99,10 @@ queue_subdir(
 	new_sftd->sft = sft;
 	new_sftd->rootdir = is_rootdir;
 
-	pthread_mutex_lock(&sft->lock);
-	sft->nr_dirs++;
-	pthread_mutex_unlock(&sft->lock);
+	inc_nr_dirs(sft);
 	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
 	if (error) {
+		dec_nr_dirs(sft);
 		str_info(ctx, ctx->mntpoint,
 _("Could not queue subdirectory scan work."));
 		return false;
@@ -172,12 +197,7 @@ scan_fs_dir(
 		str_errno(ctx, sftd->path);
 
 out:
-	pthread_mutex_lock(&sft->lock);
-	sft->nr_dirs--;
-	if (sft->nr_dirs == 0)
-		pthread_cond_signal(&sft->wakeup);
-	pthread_mutex_unlock(&sft->lock);
-
+	dec_nr_dirs(sft);
 	free(sftd->path);
 	free(sftd);
 }


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-06  3:33 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
2019-09-06  3:34 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
2019-09-10  1:04   ` Dave Chinner
2019-09-06  3:34 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
2019-09-06  3:34 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2019-08-26 21:21 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
2019-08-26 21:21 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
2019-09-04  8:15   ` Dave Chinner

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org linux-xfs@archiver.kernel.org
	public-inbox-index linux-xfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox