linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code
@ 2019-08-26 21:21 Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

Refactor the code that deals with queueing and unqueueing directories to
process via thread pool, and thereby solve a few deadlock bugs.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=scrub-fix-vfs-walk

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item
  2019-08-26 21:21 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
@ 2019-08-26 21:21 ` Darrick J. Wong
  2019-09-04  8:12   ` Dave Chinner
  2019-08-26 21:21 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
  2 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Replace the open-coded process of queueing a subdirectory for scanning
with a single helper function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |   94 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 52 insertions(+), 42 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index 7b0b5bcd..ea2866d9 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -43,6 +43,49 @@ struct scan_fs_tree_dir {
 	bool			rootdir;
 };
 
+static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
+
+/* Queue a directory for scanning. */
+static bool
+queue_subdir(
+	struct scrub_ctx	*ctx,
+	struct scan_fs_tree	*sft,
+	struct workqueue	*wq,
+	const char		*path,
+	bool			is_rootdir)
+{
+	struct scan_fs_tree_dir	*new_sftd;
+	int			error;
+
+	new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
+	if (!new_sftd) {
+		str_errno(ctx, _("creating directory scan context"));
+		return false;
+	}
+
+	new_sftd->path = strdup(path);
+	if (!new_sftd->path) {
+		str_errno(ctx, _("creating directory scan path"));
+		free(new_sftd);
+		return false;
+	}
+
+	new_sftd->sft = sft;
+	new_sftd->rootdir = is_rootdir;
+
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs++;
+	pthread_mutex_unlock(&sft->lock);
+	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
+	if (error) {
+		str_info(ctx, ctx->mntpoint,
+_("Could not queue subdirectory scan work."));
+		return false;
+	}
+
+	return true;
+}
+
 /* Scan a directory sub tree. */
 static void
 scan_fs_dir(
@@ -56,7 +99,6 @@ scan_fs_dir(
 	DIR			*dir;
 	struct dirent		*dirent;
 	char			newpath[PATH_MAX];
-	struct scan_fs_tree_dir	*new_sftd;
 	struct stat		sb;
 	int			dir_fd;
 	int			error;
@@ -117,25 +159,10 @@ scan_fs_dir(
 		/* If directory, call ourselves recursively. */
 		if (S_ISDIR(sb.st_mode) && strcmp(".", dirent->d_name) &&
 		    strcmp("..", dirent->d_name)) {
-			new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
-			if (!new_sftd) {
-				str_errno(ctx, newpath);
-				sft->moveon = false;
-				break;
-			}
-			new_sftd->path = strdup(newpath);
-			new_sftd->sft = sft;
-			new_sftd->rootdir = false;
-			pthread_mutex_lock(&sft->lock);
-			sft->nr_dirs++;
-			pthread_mutex_unlock(&sft->lock);
-			error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
-			if (error) {
-				str_info(ctx, ctx->mntpoint,
-_("Could not queue subdirectory scan work."));
-				sft->moveon = false;
+			sft->moveon = queue_subdir(ctx, sft, wq, newpath,
+					false);
+			if (!sft->moveon)
 				break;
-			}
 		}
 	}
 
@@ -165,11 +192,10 @@ scan_fs_tree(
 {
 	struct workqueue	wq;
 	struct scan_fs_tree	sft;
-	struct scan_fs_tree_dir	*sftd;
 	int			ret;
 
 	sft.moveon = true;
-	sft.nr_dirs = 1;
+	sft.nr_dirs = 0;
 	sft.root_sb = ctx->mnt_sb;
 	sft.dir_fn = dir_fn;
 	sft.dirent_fn = dirent_fn;
@@ -177,41 +203,25 @@ scan_fs_tree(
 	pthread_mutex_init(&sft.lock, NULL);
 	pthread_cond_init(&sft.wakeup, NULL);
 
-	sftd = malloc(sizeof(struct scan_fs_tree_dir));
-	if (!sftd) {
-		str_errno(ctx, ctx->mntpoint);
-		return false;
-	}
-	sftd->path = strdup(ctx->mntpoint);
-	sftd->sft = &sft;
-	sftd->rootdir = true;
-
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
 		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
-		goto out_free;
+		return false;
 	}
-	ret = workqueue_add(&wq, scan_fs_dir, 0, sftd);
-	if (ret) {
-		str_info(ctx, ctx->mntpoint,
-_("Could not queue directory scan work."));
+
+	sft.moveon = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
+	if (!sft.moveon)
 		goto out_wq;
-	}
 
 	pthread_mutex_lock(&sft.lock);
 	pthread_cond_wait(&sft.wakeup, &sft.lock);
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
-	workqueue_destroy(&wq);
 
-	return sft.moveon;
 out_wq:
 	workqueue_destroy(&wq);
-out_free:
-	free(sftd->path);
-	free(sftd);
-	return false;
+	return sft.moveon;
 }
 
 #ifndef FITRIM


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems
  2019-08-26 21:21 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
@ 2019-08-26 21:21 ` Darrick J. Wong
  2019-09-04  8:15   ` Dave Chinner
  2019-08-26 21:21 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
  2 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

When we're scanning the directory tree, we bump nr_dirs every time we
think we're going to queue a new directory to process, and we decrement
it every time we're finished doing something with a directory
(successful or not).  We forgot to undo a counter increment when
workqueue_add fails, so refactor the code into helpers and call them
as necessary for correct operation.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |   38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index ea2866d9..b358ab4a 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -45,6 +45,32 @@ struct scan_fs_tree_dir {
 
 static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
 
+/* Increment the number of directories that are queued for processing. */
+static void
+inc_nr_dirs(
+	struct scan_fs_tree	*sft)
+{
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs++;
+	pthread_mutex_unlock(&sft->lock);
+}
+
+/*
+ * Decrement the number of directories that are queued for processing and if
+ * we ran out of dirs to process, wake up anyone who was waiting for processing
+ * to finish.
+ */
+static void
+dec_nr_dirs(
+	struct scan_fs_tree	*sft)
+{
+	pthread_mutex_lock(&sft->lock);
+	sft->nr_dirs--;
+	if (sft->nr_dirs == 0)
+		pthread_cond_signal(&sft->wakeup);
+	pthread_mutex_unlock(&sft->lock);
+}
+
 /* Queue a directory for scanning. */
 static bool
 queue_subdir(
@@ -73,11 +99,10 @@ queue_subdir(
 	new_sftd->sft = sft;
 	new_sftd->rootdir = is_rootdir;
 
-	pthread_mutex_lock(&sft->lock);
-	sft->nr_dirs++;
-	pthread_mutex_unlock(&sft->lock);
+	inc_nr_dirs(sft);
 	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
 	if (error) {
+		dec_nr_dirs(sft);
 		str_info(ctx, ctx->mntpoint,
 _("Could not queue subdirectory scan work."));
 		return false;
@@ -172,12 +197,7 @@ scan_fs_dir(
 		str_errno(ctx, sftd->path);
 
 out:
-	pthread_mutex_lock(&sft->lock);
-	sft->nr_dirs--;
-	if (sft->nr_dirs == 0)
-		pthread_cond_signal(&sft->wakeup);
-	pthread_mutex_unlock(&sft->lock);
-
+	dec_nr_dirs(sft);
 	free(sftd->path);
 	free(sftd);
 }


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree
  2019-08-26 21:21 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
  2019-08-26 21:21 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
@ 2019-08-26 21:21 ` Darrick J. Wong
  2019-09-04  8:20   ` Dave Chinner
  2 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:21 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

We don't need to wait on the condition variable if directory tree
scanning has already finished by the time we've finished queueing all
the directory work items.  This is easy to trigger when the workqueue is
single-threaded, but in theory it could happen any time.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index b358ab4a..0e971d27 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -235,7 +235,8 @@ scan_fs_tree(
 		goto out_wq;
 
 	pthread_mutex_lock(&sft.lock);
-	pthread_cond_wait(&sft.wakeup, &sft.lock);
+	if (sft.nr_dirs)
+		pthread_cond_wait(&sft.wakeup, &sft.lock);
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item
  2019-08-26 21:21 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
@ 2019-09-04  8:12   ` Dave Chinner
  2019-09-04 16:37     ` Darrick J. Wong
  0 siblings, 1 reply; 10+ messages in thread
From: Dave Chinner @ 2019-09-04  8:12 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:21:18PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Replace the open-coded process of queueing a subdirectory for scanning
> with a single helper function.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/vfs.c |   94 +++++++++++++++++++++++++++++++++--------------------------
>  1 file changed, 52 insertions(+), 42 deletions(-)
> 
> 
> diff --git a/scrub/vfs.c b/scrub/vfs.c
> index 7b0b5bcd..ea2866d9 100644
> --- a/scrub/vfs.c
> +++ b/scrub/vfs.c
> @@ -43,6 +43,49 @@ struct scan_fs_tree_dir {
>  	bool			rootdir;
>  };
>  
> +static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
> +
> +/* Queue a directory for scanning. */
> +static bool
> +queue_subdir(
> +	struct scrub_ctx	*ctx,
> +	struct scan_fs_tree	*sft,
> +	struct workqueue	*wq,
> +	const char		*path,
> +	bool			is_rootdir)
> +{
> +	struct scan_fs_tree_dir	*new_sftd;
> +	int			error;
> +
> +	new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
> +	if (!new_sftd) {
> +		str_errno(ctx, _("creating directory scan context"));
> +		return false;
> +	}
> +
> +	new_sftd->path = strdup(path);
> +	if (!new_sftd->path) {
> +		str_errno(ctx, _("creating directory scan path"));
> +		free(new_sftd);
> +		return false;
> +	}
> +
> +	new_sftd->sft = sft;
> +	new_sftd->rootdir = is_rootdir;
> +
> +	pthread_mutex_lock(&sft->lock);
> +	sft->nr_dirs++;
> +	pthread_mutex_unlock(&sft->lock);
> +	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
> +	if (error) {
> +		str_info(ctx, ctx->mntpoint,
> +_("Could not queue subdirectory scan work."));
> +		return false;

Need to drop sft->nr_dirs here, probably free the memory, too.

> @@ -177,41 +203,25 @@ scan_fs_tree(
>  	pthread_mutex_init(&sft.lock, NULL);
>  	pthread_cond_init(&sft.wakeup, NULL);
>  
> -	sftd = malloc(sizeof(struct scan_fs_tree_dir));
> -	if (!sftd) {
> -		str_errno(ctx, ctx->mntpoint);
> -		return false;
> -	}
> -	sftd->path = strdup(ctx->mntpoint);
> -	sftd->sft = &sft;
> -	sftd->rootdir = true;
> -
>  	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
>  			scrub_nproc_workqueue(ctx));
>  	if (ret) {
>  		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
> -		goto out_free;
> +		return false;
>  	}
> -	ret = workqueue_add(&wq, scan_fs_dir, 0, sftd);
> -	if (ret) {
> -		str_info(ctx, ctx->mntpoint,
> -_("Could not queue directory scan work."));
> +
> +	sft.moveon = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
> +	if (!sft.moveon)
>  		goto out_wq;
> -	}

sft is a stack varable that is stuffed into the structure passed to
work run on the workqueue. Is that safe to do here?

>  	pthread_mutex_lock(&sft.lock);
>  	pthread_cond_wait(&sft.wakeup, &sft.lock);

maybe it is because of this, but it's not immediately obvious what
condition actually triggers and that all the work is done...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems
  2019-08-26 21:21 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
@ 2019-09-04  8:15   ` Dave Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2019-09-04  8:15 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:21:25PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> When we're scanning the directory tree, we bump nr_dirs every time we
> think we're going to queue a new directory to process, and we decrement
> it every time we're finished doing something with a directory
> (successful or not).  We forgot to undo a counter increment when
> workqueue_add fails, so refactor the code into helpers and call them
> as necessary for correct operation.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/vfs.c |   38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
> 
> 
> diff --git a/scrub/vfs.c b/scrub/vfs.c
> index ea2866d9..b358ab4a 100644
> --- a/scrub/vfs.c
> +++ b/scrub/vfs.c
> @@ -45,6 +45,32 @@ struct scan_fs_tree_dir {
>  
>  static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
>  
> +/* Increment the number of directories that are queued for processing. */
> +static void
> +inc_nr_dirs(
> +	struct scan_fs_tree	*sft)
> +{
> +	pthread_mutex_lock(&sft->lock);
> +	sft->nr_dirs++;
> +	pthread_mutex_unlock(&sft->lock);
> +}
> +
> +/*
> + * Decrement the number of directories that are queued for processing and if
> + * we ran out of dirs to process, wake up anyone who was waiting for processing
> + * to finish.
> + */
> +static void
> +dec_nr_dirs(
> +	struct scan_fs_tree	*sft)
> +{
> +	pthread_mutex_lock(&sft->lock);
> +	sft->nr_dirs--;
> +	if (sft->nr_dirs == 0)
> +		pthread_cond_signal(&sft->wakeup);
> +	pthread_mutex_unlock(&sft->lock);
> +}
> +
>  /* Queue a directory for scanning. */
>  static bool
>  queue_subdir(
> @@ -73,11 +99,10 @@ queue_subdir(
>  	new_sftd->sft = sft;
>  	new_sftd->rootdir = is_rootdir;
>  
> -	pthread_mutex_lock(&sft->lock);
> -	sft->nr_dirs++;
> -	pthread_mutex_unlock(&sft->lock);
> +	inc_nr_dirs(sft);
>  	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
>  	if (error) {
> +		dec_nr_dirs(sft);
>  		str_info(ctx, ctx->mntpoint,
>  _("Could not queue subdirectory scan work."));
>  		return false;

Ok, that's the bug fix for the previous patch. Potentially should be
a separate patch, but right now there is so much outstanding that I
don't think it's worthwhile to respin the series just to fix that.

Reviewed-by: Dave Chinner <dchinner@redhat.com>

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree
  2019-08-26 21:21 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
@ 2019-09-04  8:20   ` Dave Chinner
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2019-09-04  8:20 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: sandeen, linux-xfs

On Mon, Aug 26, 2019 at 02:21:31PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> We don't need to wait on the condition variable if directory tree
> scanning has already finished by the time we've finished queueing all
> the directory work items.  This is easy to trigger when the workqueue is
> single-threaded, but in theory it could happen any time.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  scrub/vfs.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/scrub/vfs.c b/scrub/vfs.c
> index b358ab4a..0e971d27 100644
> --- a/scrub/vfs.c
> +++ b/scrub/vfs.c
> @@ -235,7 +235,8 @@ scan_fs_tree(
>  		goto out_wq;
>  
>  	pthread_mutex_lock(&sft.lock);
> -	pthread_cond_wait(&sft.wakeup, &sft.lock);
> +	if (sft.nr_dirs)
> +		pthread_cond_wait(&sft.wakeup, &sft.lock);


Ok, fixes a typical pthread counting conditional bug. :/

Reviewed-by: Dave Chinner <dchinner@redhat.com>


-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item
  2019-09-04  8:12   ` Dave Chinner
@ 2019-09-04 16:37     ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2019-09-04 16:37 UTC (permalink / raw)
  To: Dave Chinner; +Cc: sandeen, linux-xfs

On Wed, Sep 04, 2019 at 06:12:52PM +1000, Dave Chinner wrote:
> On Mon, Aug 26, 2019 at 02:21:18PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > 
> > Replace the open-coded process of queueing a subdirectory for scanning
> > with a single helper function.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > ---
> >  scrub/vfs.c |   94 +++++++++++++++++++++++++++++++++--------------------------
> >  1 file changed, 52 insertions(+), 42 deletions(-)
> > 
> > 
> > diff --git a/scrub/vfs.c b/scrub/vfs.c
> > index 7b0b5bcd..ea2866d9 100644
> > --- a/scrub/vfs.c
> > +++ b/scrub/vfs.c
> > @@ -43,6 +43,49 @@ struct scan_fs_tree_dir {
> >  	bool			rootdir;
> >  };
> >  
> > +static void scan_fs_dir(struct workqueue *wq, xfs_agnumber_t agno, void *arg);
> > +
> > +/* Queue a directory for scanning. */
> > +static bool
> > +queue_subdir(
> > +	struct scrub_ctx	*ctx,
> > +	struct scan_fs_tree	*sft,
> > +	struct workqueue	*wq,
> > +	const char		*path,
> > +	bool			is_rootdir)
> > +{
> > +	struct scan_fs_tree_dir	*new_sftd;
> > +	int			error;
> > +
> > +	new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
> > +	if (!new_sftd) {
> > +		str_errno(ctx, _("creating directory scan context"));
> > +		return false;
> > +	}
> > +
> > +	new_sftd->path = strdup(path);
> > +	if (!new_sftd->path) {
> > +		str_errno(ctx, _("creating directory scan path"));
> > +		free(new_sftd);
> > +		return false;
> > +	}
> > +
> > +	new_sftd->sft = sft;
> > +	new_sftd->rootdir = is_rootdir;
> > +
> > +	pthread_mutex_lock(&sft->lock);
> > +	sft->nr_dirs++;
> > +	pthread_mutex_unlock(&sft->lock);
> > +	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
> > +	if (error) {
> > +		str_info(ctx, ctx->mntpoint,
> > +_("Could not queue subdirectory scan work."));
> > +		return false;
> 
> Need to drop sft->nr_dirs here, probably free the memory, too.

nr_dirs is (as you've observed) fixed in the next patch.

Yes, we need to free the memory.  Good catch.

> > @@ -177,41 +203,25 @@ scan_fs_tree(
> >  	pthread_mutex_init(&sft.lock, NULL);
> >  	pthread_cond_init(&sft.wakeup, NULL);
> >  
> > -	sftd = malloc(sizeof(struct scan_fs_tree_dir));
> > -	if (!sftd) {
> > -		str_errno(ctx, ctx->mntpoint);
> > -		return false;
> > -	}
> > -	sftd->path = strdup(ctx->mntpoint);
> > -	sftd->sft = &sft;
> > -	sftd->rootdir = true;
> > -
> >  	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
> >  			scrub_nproc_workqueue(ctx));
> >  	if (ret) {
> >  		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
> > -		goto out_free;
> > +		return false;
> >  	}
> > -	ret = workqueue_add(&wq, scan_fs_dir, 0, sftd);
> > -	if (ret) {
> > -		str_info(ctx, ctx->mntpoint,
> > -_("Could not queue directory scan work."));
> > +
> > +	sft.moveon = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
> > +	if (!sft.moveon)
> >  		goto out_wq;
> > -	}
> 
> sft is a stack varable that is stuffed into the structure passed to
> work run on the workqueue. Is that safe to do here?
> 
> >  	pthread_mutex_lock(&sft.lock);
> >  	pthread_cond_wait(&sft.wakeup, &sft.lock);
> 
> maybe it is because of this, but it's not immediately obvious what
> condition actually triggers and that all the work is done...

A worker thread signals the condition variable when nr_dirs hits zero.
There should only be one worker left when this happens (assuming the
accounting is correct) and the worker doesn't do anything with sft after
it unlocks it, so this should be safe.

Will add comment to that effect.

--D

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree
  2019-09-25 21:31 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
@ 2019-09-25 21:31 ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2019-09-25 21:31 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, Dave Chinner

From: Darrick J. Wong <darrick.wong@oracle.com>

We don't need to wait on the condition variable if directory tree
scanning has already finished by the time we've finished queueing all
the directory work items.  This is easy to trigger when the workqueue is
single-threaded, but in theory it could happen any time.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 scrub/vfs.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index f8bc98c0..1a1482dd 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -246,7 +246,8 @@ scan_fs_tree(
 	 * about to tear everything down.
 	 */
 	pthread_mutex_lock(&sft.lock);
-	pthread_cond_wait(&sft.wakeup, &sft.lock);
+	if (sft.nr_dirs)
+		pthread_cond_wait(&sft.wakeup, &sft.lock);
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree
  2019-09-06  3:33 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
@ 2019-09-06  3:34 ` Darrick J. Wong
  0 siblings, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:34 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs, Dave Chinner

From: Darrick J. Wong <darrick.wong@oracle.com>

We don't need to wait on the condition variable if directory tree
scanning has already finished by the time we've finished queueing all
the directory work items.  This is easy to trigger when the workqueue is
single-threaded, but in theory it could happen any time.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 scrub/vfs.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index f8bc98c0..1a1482dd 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -246,7 +246,8 @@ scan_fs_tree(
 	 * about to tear everything down.
 	 */
 	pthread_mutex_lock(&sft.lock);
-	pthread_cond_wait(&sft.wakeup, &sft.lock);
+	if (sft.nr_dirs)
+		pthread_cond_wait(&sft.wakeup, &sft.lock);
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-09-25 21:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-26 21:21 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
2019-08-26 21:21 ` [PATCH 1/3] xfs_scrub: refactor queueing of subdir scan work item Darrick J. Wong
2019-09-04  8:12   ` Dave Chinner
2019-09-04 16:37     ` Darrick J. Wong
2019-08-26 21:21 ` [PATCH 2/3] xfs_scrub: fix nr_dirs accounting problems Darrick J. Wong
2019-09-04  8:15   ` Dave Chinner
2019-08-26 21:21 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
2019-09-04  8:20   ` Dave Chinner
2019-09-06  3:33 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
2019-09-06  3:34 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong
2019-09-25 21:31 [PATCH 0/3] xfs_scrub: fix bugs in vfs tree walk code Darrick J. Wong
2019-09-25 21:31 ` [PATCH 3/3] xfs_scrub: remove unnecessary wakeup wait in scan_fs_tree Darrick J. Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).