Linux-XFS Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 00/13] libfrog/xfs_scrub: fix error handling
@ 2019-09-06  3:36 Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 01/13] libfrog: fix workqueue error communication problems Darrick J. Wong
                   ` (12 more replies)
  0 siblings, 13 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

Hi all,

The new code introduced by xfs_scrub do not deal with error returns in a
consistent fashion.  Some places we return -1 and set errno, some places
just do whatever libc does, others return positive error codes, and the
worst offenders do a combination of that.  Worse yet, sometimes we also
fail to check for error returns at all.

This series replaces all that with a single error handling strategy --
return positive error codes and always check the return codes from other
library functions.

If you're going to start using this mess, you probably ought to just
pull from my git trees, which are linked below.

This is an extraordinary way to destroy everything.  Enjoy!
Comments and questions are, as always, welcome.

--D

xfsprogs git tree:
https://git.kernel.org/cgit/linux/kernel/git/djwong/xfsprogs-dev.git/log/?h=libfrog-error-handling

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 01/13] libfrog: fix workqueue error communication problems
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 02/13] libfrog: fix missing error checking in workqueue code Darrick J. Wong
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Convert all the workqueue functions to return positive error codes so
that we can move away from the libc-style indirect errno handling and
towards passing error codes directly back to callers.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/workqueue.c |    4 ++--
 scrub/fscounters.c  |    5 ++---
 scrub/inodes.c      |    5 ++---
 scrub/phase2.c      |    8 +++-----
 scrub/phase4.c      |    6 +++---
 scrub/read_verify.c |    3 +--
 scrub/spacemap.c    |   11 ++++-------
 scrub/vfs.c         |    3 +--
 8 files changed, 18 insertions(+), 27 deletions(-)


diff --git a/libfrog/workqueue.c b/libfrog/workqueue.c
index 73114773..a806da3e 100644
--- a/libfrog/workqueue.c
+++ b/libfrog/workqueue.c
@@ -106,8 +106,8 @@ workqueue_add(
 	}
 
 	wi = malloc(sizeof(struct workqueue_item));
-	if (wi == NULL)
-		return ENOMEM;
+	if (!wi)
+		return errno;
 
 	wi->function = func;
 	wi->index = index;
diff --git a/scrub/fscounters.c b/scrub/fscounters.c
index ad467e0c..669c5ab0 100644
--- a/scrub/fscounters.c
+++ b/scrub/fscounters.c
@@ -115,15 +115,14 @@ xfs_count_all_inodes(
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
 		moveon = false;
-		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
+		str_liberror(ctx, ret, _("creating icount workqueue"));
 		goto out_free;
 	}
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
 		ret = workqueue_add(&wq, xfs_count_ag_inodes, agno, ci);
 		if (ret) {
 			moveon = false;
-			str_info(ctx, ctx->mntpoint,
-_("Could not queue AG %u icount work."), agno);
+			str_liberror(ctx, ret, _("queueing icount work"));
 			break;
 		}
 	}
diff --git a/scrub/inodes.c b/scrub/inodes.c
index c7aadae7..06350ec6 100644
--- a/scrub/inodes.c
+++ b/scrub/inodes.c
@@ -243,7 +243,7 @@ xfs_scan_all_inodes(
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
-		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
+		str_liberror(ctx, ret, _("creating bulkstat workqueue"));
 		return false;
 	}
 
@@ -251,8 +251,7 @@ xfs_scan_all_inodes(
 		ret = workqueue_add(&wq, xfs_scan_ag_inodes, agno, &si);
 		if (ret) {
 			si.moveon = false;
-			str_info(ctx, ctx->mntpoint,
-_("Could not queue AG %u bulkstat work."), agno);
+			str_liberror(ctx, ret, _("queueing bulkstat work"));
 			break;
 		}
 	}
diff --git a/scrub/phase2.c b/scrub/phase2.c
index f064c83d..1d2244a4 100644
--- a/scrub/phase2.c
+++ b/scrub/phase2.c
@@ -124,7 +124,7 @@ xfs_scan_metadata(
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
-		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
+		str_liberror(ctx, ret, _("creating scrub workqueue"));
 		return false;
 	}
 
@@ -145,8 +145,7 @@ xfs_scan_metadata(
 		ret = workqueue_add(&wq, xfs_scan_ag_metadata, agno, &moveon);
 		if (ret) {
 			moveon = false;
-			str_info(ctx, ctx->mntpoint,
-_("Could not queue AG %u scrub work."), agno);
+			str_liberror(ctx, ret, _("queueing per-AG scrub work"));
 			goto out;
 		}
 	}
@@ -157,8 +156,7 @@ _("Could not queue AG %u scrub work."), agno);
 	ret = workqueue_add(&wq, xfs_scan_fs_metadata, 0, &moveon);
 	if (ret) {
 		moveon = false;
-		str_info(ctx, ctx->mntpoint,
-_("Could not queue filesystem scrub work."));
+		str_liberror(ctx, ret, _("queueing per-FS scrub work"));
 		goto out;
 	}
 
diff --git a/scrub/phase4.c b/scrub/phase4.c
index 25fedc83..903da6d2 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -74,7 +74,7 @@ xfs_process_action_items(
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
-		str_error(ctx, ctx->mntpoint, _("Could not create workqueue."));
+		str_liberror(ctx, ret, _("creating repair workqueue"));
 		return false;
 	}
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++) {
@@ -82,8 +82,8 @@ xfs_process_action_items(
 			ret = workqueue_add(&wq, xfs_repair_ag, agno, &moveon);
 			if (ret) {
 				moveon = false;
-				str_error(ctx, ctx->mntpoint,
-_("Could not queue repair work."));
+				str_liberror(ctx, ret,
+						_("queueing repair work"));
 				break;
 			}
 		}
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index 2152d167..ff4d3572 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -197,8 +197,7 @@ read_verify_queue(
 
 	ret = workqueue_add(&rvp->wq, read_verify, 0, tmp);
 	if (ret) {
-		str_info(rvp->ctx, rvp->ctx->mntpoint,
-_("Could not queue read-verify work."));
+		str_liberror(rvp->ctx, ret, _("queueing read-verify work"));
 		free(tmp);
 		return false;
 	}
diff --git a/scrub/spacemap.c b/scrub/spacemap.c
index a7876478..4258e318 100644
--- a/scrub/spacemap.c
+++ b/scrub/spacemap.c
@@ -200,7 +200,7 @@ xfs_scan_all_spacemaps(
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
-		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
+		str_liberror(ctx, ret, _("creating fsmap workqueue"));
 		return false;
 	}
 	if (ctx->fsinfo.fs_rt) {
@@ -208,8 +208,7 @@ xfs_scan_all_spacemaps(
 				ctx->mnt.fsgeom.agcount + 1, &sbx);
 		if (ret) {
 			sbx.moveon = false;
-			str_info(ctx, ctx->mntpoint,
-_("Could not queue rtdev fsmap work."));
+			str_liberror(ctx, ret, _("queueing rtdev fsmap work"));
 			goto out;
 		}
 	}
@@ -218,8 +217,7 @@ _("Could not queue rtdev fsmap work."));
 				ctx->mnt.fsgeom.agcount + 2, &sbx);
 		if (ret) {
 			sbx.moveon = false;
-			str_info(ctx, ctx->mntpoint,
-_("Could not queue logdev fsmap work."));
+			str_liberror(ctx, ret, _("queueing logdev fsmap work"));
 			goto out;
 		}
 	}
@@ -227,8 +225,7 @@ _("Could not queue logdev fsmap work."));
 		ret = workqueue_add(&wq, xfs_scan_ag_blocks, agno, &sbx);
 		if (ret) {
 			sbx.moveon = false;
-			str_info(ctx, ctx->mntpoint,
-_("Could not queue AG %u fsmap work."), agno);
+			str_liberror(ctx, ret, _("queueing per-AG fsmap work"));
 			break;
 		}
 	}
diff --git a/scrub/vfs.c b/scrub/vfs.c
index 1a1482dd..0cff2e3f 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -102,8 +102,7 @@ queue_subdir(
 	error = workqueue_add(wq, scan_fs_dir, 0, new_sftd);
 	if (error) {
 		dec_nr_dirs(sft);
-		str_info(ctx, ctx->mntpoint,
-_("Could not queue subdirectory scan work."));
+		str_liberror(ctx, error, _("queueing directory scan work"));
 		goto out_path;
 	}
 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 02/13] libfrog: fix missing error checking in workqueue code
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 01/13] libfrog: fix workqueue error communication problems Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 03/13] libfrog: split workqueue destroy functions Darrick J. Wong
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Fix all the places in the workqueue code where we fail to check return
values and blindly keep going when we shouldn't.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/workqueue.c |   32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)


diff --git a/libfrog/workqueue.c b/libfrog/workqueue.c
index a806da3e..48038363 100644
--- a/libfrog/workqueue.c
+++ b/libfrog/workqueue.c
@@ -67,12 +67,20 @@ workqueue_create(
 	int			err = 0;
 
 	memset(wq, 0, sizeof(*wq));
-	pthread_cond_init(&wq->wakeup, NULL);
-	pthread_mutex_init(&wq->lock, NULL);
+	err = pthread_cond_init(&wq->wakeup, NULL);
+	if (err)
+		return err;
+	err = pthread_mutex_init(&wq->lock, NULL);
+	if (err)
+		goto out_cond;
 
 	wq->wq_ctx = wq_ctx;
 	wq->thread_count = nr_workers;
 	wq->threads = malloc(nr_workers * sizeof(pthread_t));
+	if (!wq->threads) {
+		err = errno;
+		goto out_mutex;
+	}
 	wq->terminate = false;
 
 	for (i = 0; i < nr_workers; i++) {
@@ -82,9 +90,19 @@ workqueue_create(
 			break;
 	}
 
+	/*
+	 * If we encounter errors here, we have to signal and then wait for all
+	 * the threads that may have been started running before we can destroy
+	 * the workqueue.
+	 */
 	if (err)
 		workqueue_destroy(wq);
 	return err;
+out_mutex:
+	pthread_mutex_destroy(&wq->lock);
+out_cond:
+	pthread_cond_destroy(&wq->wakeup);
+	return err;
 }
 
 /*
@@ -99,6 +117,7 @@ workqueue_add(
 	void			*arg)
 {
 	struct workqueue_item	*wi;
+	int			ret;
 
 	if (wq->thread_count == 0) {
 		func(wq, index, arg);
@@ -118,9 +137,11 @@ workqueue_add(
 	/* Now queue the new work structure to the work queue. */
 	pthread_mutex_lock(&wq->lock);
 	if (wq->next_item == NULL) {
-		wq->next_item = wi;
 		assert(wq->item_count == 0);
-		pthread_cond_signal(&wq->wakeup);
+		ret = pthread_cond_signal(&wq->wakeup);
+		if (ret)
+			goto out_item;
+		wq->next_item = wi;
 	} else {
 		wq->last_item->next = wi;
 	}
@@ -129,6 +150,9 @@ workqueue_add(
 	pthread_mutex_unlock(&wq->lock);
 
 	return 0;
+out_item:
+	free(wi);
+	return ret;
 }
 
 /*


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 03/13] libfrog: split workqueue destroy functions
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 01/13] libfrog: fix workqueue error communication problems Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 02/13] libfrog: fix missing error checking in workqueue code Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 04/13] xfs_scrub: redistribute read verify pool flush and destroy responsibilities Darrick J. Wong
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Split the workqueue destroy function into two parts -- one to signal all
the threads to exit and wait for them, and a second one that actually
destroys all the memory associated with the workqueue.  This mean we can
report latent workqueue errors independent of the freeing function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/workqueue.c |   37 ++++++++++++++++++++++++++++++-------
 libfrog/workqueue.h |    2 ++
 repair/threads.c    |    6 ++++++
 scrub/fscounters.c  |   11 ++++++++++-
 scrub/inodes.c      |    5 +++++
 scrub/phase2.c      |    5 +++++
 scrub/phase4.c      |    6 ++++++
 scrub/read_verify.c |    1 +
 scrub/spacemap.c    |    5 +++++
 scrub/vfs.c         |    5 +++++
 10 files changed, 75 insertions(+), 8 deletions(-)


diff --git a/libfrog/workqueue.c b/libfrog/workqueue.c
index 48038363..07f11a7b 100644
--- a/libfrog/workqueue.c
+++ b/libfrog/workqueue.c
@@ -82,6 +82,7 @@ workqueue_create(
 		goto out_mutex;
 	}
 	wq->terminate = false;
+	wq->terminated = false;
 
 	for (i = 0; i < nr_workers; i++) {
 		err = pthread_create(&wq->threads[i], NULL, workqueue_thread,
@@ -119,6 +120,8 @@ workqueue_add(
 	struct workqueue_item	*wi;
 	int			ret;
 
+	assert(!wq->terminated);
+
 	if (wq->thread_count == 0) {
 		func(wq, index, arg);
 		return 0;
@@ -157,22 +160,42 @@ workqueue_add(
 
 /*
  * Wait for all pending work items to be processed and tear down the
- * workqueue.
+ * workqueue thread pool.
  */
-void
-workqueue_destroy(
+int
+workqueue_terminate(
 	struct workqueue	*wq)
 {
 	unsigned int		i;
+	int			ret;
+
+	pthread_mutex_lock(&wq->lock);
+	wq->terminate = true;
+	pthread_mutex_unlock(&wq->lock);
+
+	ret = pthread_cond_broadcast(&wq->wakeup);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < wq->thread_count; i++) {
+		ret = pthread_join(wq->threads[i], NULL);
+		if (ret)
+			return ret;
+	}
 
 	pthread_mutex_lock(&wq->lock);
-	wq->terminate = 1;
+	wq->terminated = true;
 	pthread_mutex_unlock(&wq->lock);
 
-	pthread_cond_broadcast(&wq->wakeup);
+	return 0;
+}
 
-	for (i = 0; i < wq->thread_count; i++)
-		pthread_join(wq->threads[i], NULL);
+/* Tear down the workqueue. */
+void
+workqueue_destroy(
+	struct workqueue	*wq)
+{
+	assert(wq->terminated);
 
 	free(wq->threads);
 	pthread_mutex_destroy(&wq->lock);
diff --git a/libfrog/workqueue.h b/libfrog/workqueue.h
index a1f3a57c..a56d1cf1 100644
--- a/libfrog/workqueue.h
+++ b/libfrog/workqueue.h
@@ -30,12 +30,14 @@ struct workqueue {
 	unsigned int		item_count;
 	unsigned int		thread_count;
 	bool			terminate;
+	bool			terminated;
 };
 
 int workqueue_create(struct workqueue *wq, void *wq_ctx,
 		unsigned int nr_workers);
 int workqueue_add(struct workqueue *wq, workqueue_func_t fn,
 		uint32_t index, void *arg);
+int workqueue_terminate(struct workqueue *wq);
 void workqueue_destroy(struct workqueue *wq);
 
 #endif	/* __LIBFROG_WORKQUEUE_H__ */
diff --git a/repair/threads.c b/repair/threads.c
index d2190920..9b7241e3 100644
--- a/repair/threads.c
+++ b/repair/threads.c
@@ -56,5 +56,11 @@ void
 destroy_work_queue(
 	struct workqueue	*wq)
 {
+	int			err;
+
+	err = workqueue_terminate(wq);
+	if (err)
+		do_error(_("cannot terminate worker item, error = [%d] %s\n"),
+				err, strerror(err));
 	workqueue_destroy(wq);
 }
diff --git a/scrub/fscounters.c b/scrub/fscounters.c
index 669c5ab0..98aa3826 100644
--- a/scrub/fscounters.c
+++ b/scrub/fscounters.c
@@ -102,7 +102,7 @@ xfs_count_all_inodes(
 	struct xfs_count_inodes	*ci;
 	xfs_agnumber_t		agno;
 	struct workqueue	wq;
-	bool			moveon;
+	bool			moveon = true;
 	int			ret;
 
 	ci = calloc(1, sizeof(struct xfs_count_inodes) +
@@ -126,8 +126,17 @@ xfs_count_all_inodes(
 			break;
 		}
 	}
+
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing icount work"));
+	}
 	workqueue_destroy(&wq);
 
+	if (!moveon)
+		goto out_free;
+
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
 		*count += ci->counters[agno];
 	moveon = ci->moveon;
diff --git a/scrub/inodes.c b/scrub/inodes.c
index 06350ec6..37a35a3f 100644
--- a/scrub/inodes.c
+++ b/scrub/inodes.c
@@ -256,6 +256,11 @@ xfs_scan_all_inodes(
 		}
 	}
 
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		si.moveon = false;
+		str_liberror(ctx, ret, _("finishing bulkstat work"));
+	}
 	workqueue_destroy(&wq);
 
 	return si.moveon;
diff --git a/scrub/phase2.c b/scrub/phase2.c
index 1d2244a4..d92b7e29 100644
--- a/scrub/phase2.c
+++ b/scrub/phase2.c
@@ -161,6 +161,11 @@ xfs_scan_metadata(
 	}
 
 out:
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing scrub work"));
+	}
 	workqueue_destroy(&wq);
 	return moveon;
 }
diff --git a/scrub/phase4.c b/scrub/phase4.c
index 903da6d2..eb30c189 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -90,6 +90,12 @@ xfs_process_action_items(
 		if (!moveon)
 			break;
 	}
+
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing repair work"));
+	}
 	workqueue_destroy(&wq);
 
 	pthread_mutex_lock(&ctx->lock);
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index ff4d3572..bb8f09a8 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -120,6 +120,7 @@ void
 read_verify_pool_flush(
 	struct read_verify_pool		*rvp)
 {
+	workqueue_terminate(&rvp->wq);
 	workqueue_destroy(&rvp->wq);
 }
 
diff --git a/scrub/spacemap.c b/scrub/spacemap.c
index 4258e318..91e8badb 100644
--- a/scrub/spacemap.c
+++ b/scrub/spacemap.c
@@ -230,6 +230,11 @@ xfs_scan_all_spacemaps(
 		}
 	}
 out:
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		sbx.moveon = false;
+		str_liberror(ctx, ret, _("finishing fsmap work"));
+	}
 	workqueue_destroy(&wq);
 
 	return sbx.moveon;
diff --git a/scrub/vfs.c b/scrub/vfs.c
index 0cff2e3f..49d689af 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -250,6 +250,11 @@ scan_fs_tree(
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		sft.moveon = false;
+		str_liberror(ctx, ret, _("finishing directory scan work"));
+	}
 out_wq:
 	workqueue_destroy(&wq);
 	return sft.moveon;


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 04/13] xfs_scrub: redistribute read verify pool flush and destroy responsibilities
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (2 preceding siblings ...)
  2019-09-06  3:36 ` [PATCH 03/13] libfrog: split workqueue destroy functions Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 05/13] libfrog: fix per-thread variable error communication problems Darrick J. Wong
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Since workqueues now have separate primitives for "wait for all queued
work" and "destroy workqueue", it makes more sense for the read verify
pool code to call the workqueue destructor from its own destructor
function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/phase6.c      |    9 +++++++--
 scrub/read_verify.c |    2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)


diff --git a/scrub/phase6.c b/scrub/phase6.c
index b41f90e0..aff04e76 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -511,12 +511,17 @@ _("Could not create data device media verifier."));
 	return moveon;
 
 out_rtpool:
-	if (vs.rvp_realtime)
+	if (vs.rvp_realtime) {
+		read_verify_pool_flush(vs.rvp_realtime);
 		read_verify_pool_destroy(vs.rvp_realtime);
+	}
 out_logpool:
-	if (vs.rvp_log)
+	if (vs.rvp_log) {
+		read_verify_pool_flush(vs.rvp_log);
 		read_verify_pool_destroy(vs.rvp_log);
+	}
 out_datapool:
+	read_verify_pool_flush(vs.rvp_data);
 	read_verify_pool_destroy(vs.rvp_data);
 out_rbad:
 	bitmap_free(&vs.r_bad);
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index bb8f09a8..e59d3e67 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -121,7 +121,6 @@ read_verify_pool_flush(
 	struct read_verify_pool		*rvp)
 {
 	workqueue_terminate(&rvp->wq);
-	workqueue_destroy(&rvp->wq);
 }
 
 /* Finish up any read verification work and tear it down. */
@@ -129,6 +128,7 @@ void
 read_verify_pool_destroy(
 	struct read_verify_pool		*rvp)
 {
+	workqueue_destroy(&rvp->wq);
 	ptvar_free(rvp->rvstate);
 	ptcounter_free(rvp->verified_bytes);
 	free(rvp->readbuf);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 05/13] libfrog: fix per-thread variable error communication problems
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (3 preceding siblings ...)
  2019-09-06  3:36 ` [PATCH 04/13] xfs_scrub: redistribute read verify pool flush and destroy responsibilities Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 06/13] libfrog: add missing per-thread variable error handling Darrick J. Wong
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Convert all the per-thread variable functions away from the libc-style
indirect errno return to return error values directly to callers.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/ptvar.c     |   26 +++++++++++++++-----------
 libfrog/ptvar.h     |    8 ++++----
 scrub/counter.c     |   13 ++++++++-----
 scrub/phase7.c      |   24 ++++++++++++++++--------
 scrub/read_verify.c |   16 +++++++++++-----
 5 files changed, 54 insertions(+), 33 deletions(-)


diff --git a/libfrog/ptvar.c b/libfrog/ptvar.c
index c9296835..6cb58208 100644
--- a/libfrog/ptvar.c
+++ b/libfrog/ptvar.c
@@ -33,11 +33,12 @@ struct ptvar {
 };
 #define PTVAR_SIZE(nr, sz) (sizeof(struct ptvar) + ((nr) * (size)))
 
-/* Initialize per-thread counter. */
-struct ptvar *
-ptvar_init(
+/* Allocate a new per-thread counter. */
+int
+ptvar_alloc(
 	size_t		nr,
-	size_t		size)
+	size_t		size,
+	struct ptvar	**pptv)
 {
 	struct ptvar	*ptv;
 	int		ret;
@@ -49,7 +50,7 @@ ptvar_init(
 
 	ptv = malloc(PTVAR_SIZE(nr, size));
 	if (!ptv)
-		return NULL;
+		return errno;
 	ptv->data_size = size;
 	ptv->nr_counters = nr;
 	ptv->nr_used = 0;
@@ -60,13 +61,14 @@ ptvar_init(
 	ret = pthread_key_create(&ptv->key, NULL);
 	if (ret)
 		goto out_mutex;
-	return ptv;
 
+	*pptv = ptv;
+	return 0;
 out_mutex:
 	pthread_mutex_destroy(&ptv->lock);
 out:
 	free(ptv);
-	return NULL;
+	return ret;
 }
 
 /* Free per-thread counter. */
@@ -82,7 +84,8 @@ ptvar_free(
 /* Get a reference to this thread's variable. */
 void *
 ptvar_get(
-	struct ptvar	*ptv)
+	struct ptvar	*ptv,
+	int		*retp)
 {
 	void		*p;
 
@@ -94,23 +97,24 @@ ptvar_get(
 		pthread_setspecific(ptv->key, p);
 		pthread_mutex_unlock(&ptv->lock);
 	}
+	*retp = 0;
 	return p;
 }
 
 /* Iterate all of the per-thread variables. */
-bool
+int
 ptvar_foreach(
 	struct ptvar	*ptv,
 	ptvar_iter_fn	fn,
 	void		*foreach_arg)
 {
 	size_t		i;
-	bool		ret = true;
+	int		ret;
 
 	pthread_mutex_lock(&ptv->lock);
 	for (i = 0; i < ptv->nr_used; i++) {
 		ret = fn(ptv, &ptv->data[i * ptv->data_size], foreach_arg);
-		if (!ret)
+		if (ret)
 			break;
 	}
 	pthread_mutex_unlock(&ptv->lock);
diff --git a/libfrog/ptvar.h b/libfrog/ptvar.h
index a8803c64..42865c0b 100644
--- a/libfrog/ptvar.h
+++ b/libfrog/ptvar.h
@@ -8,11 +8,11 @@
 
 struct ptvar;
 
-typedef bool (*ptvar_iter_fn)(struct ptvar *ptv, void *data, void *foreach_arg);
+typedef int (*ptvar_iter_fn)(struct ptvar *ptv, void *data, void *foreach_arg);
 
-struct ptvar *ptvar_init(size_t nr, size_t size);
+int ptvar_alloc(size_t nr, size_t size, struct ptvar **pptv);
 void ptvar_free(struct ptvar *ptv);
-void *ptvar_get(struct ptvar *ptv);
-bool ptvar_foreach(struct ptvar *ptv, ptvar_iter_fn fn, void *foreach_arg);
+void *ptvar_get(struct ptvar *ptv, int *ret);
+int ptvar_foreach(struct ptvar *ptv, ptvar_iter_fn fn, void *foreach_arg);
 
 #endif /* __LIBFROG_PTVAR_H__ */
diff --git a/scrub/counter.c b/scrub/counter.c
index 43444927..49ffbfea 100644
--- a/scrub/counter.c
+++ b/scrub/counter.c
@@ -32,12 +32,13 @@ ptcounter_init(
 	size_t			nr)
 {
 	struct ptcounter	*p;
+	int			ret;
 
 	p = malloc(sizeof(struct ptcounter));
 	if (!p)
 		return NULL;
-	p->var = ptvar_init(nr, sizeof(uint64_t));
-	if (!p->var) {
+	ret = ptvar_alloc(nr, sizeof(uint64_t), &p->var);
+	if (ret) {
 		free(p);
 		return NULL;
 	}
@@ -60,12 +61,14 @@ ptcounter_add(
 	int64_t			nr)
 {
 	uint64_t		*p;
+	int			ret;
 
-	p = ptvar_get(ptc->var);
+	p = ptvar_get(ptc->var, &ret);
+	assert(ret == 0);
 	*p += nr;
 }
 
-static bool
+static int
 ptcounter_val_helper(
 	struct ptvar		*ptv,
 	void			*data,
@@ -75,7 +78,7 @@ ptcounter_val_helper(
 	uint64_t		*count = data;
 
 	*sum += *count;
-	return true;
+	return 0;
 }
 
 /* Return the approximate value of this counter. */
diff --git a/scrub/phase7.c b/scrub/phase7.c
index 308b8bb3..bc959f5b 100644
--- a/scrub/phase7.c
+++ b/scrub/phase7.c
@@ -36,8 +36,13 @@ xfs_record_block_summary(
 {
 	struct summary_counts	*counts;
 	unsigned long long	len;
+	int			ret;
 
-	counts = ptvar_get((struct ptvar *)arg);
+	counts = ptvar_get((struct ptvar *)arg, &ret);
+	if (ret) {
+		str_liberror(ctx, ret, _("retrieving summary counts"));
+		return false;
+	}
 	if (fsmap->fmr_device == ctx->fsinfo.fs_logdev)
 		return true;
 	if ((fsmap->fmr_flags & FMR_OF_SPECIAL_OWNER) &&
@@ -68,7 +73,7 @@ xfs_record_block_summary(
 }
 
 /* Add all the summaries in the per-thread counter */
-static bool
+static int
 xfs_add_summaries(
 	struct ptvar		*ptv,
 	void			*data,
@@ -80,7 +85,7 @@ xfs_add_summaries(
 	total->dbytes += item->dbytes;
 	total->rbytes += item->rbytes;
 	total->agbytes += item->agbytes;
-	return true;
+	return 0;
 }
 
 /*
@@ -131,9 +136,10 @@ xfs_scan_summary(
 		return false;
 	}
 
-	ptvar = ptvar_init(scrub_nproc(ctx), sizeof(struct summary_counts));
-	if (!ptvar) {
-		str_errno(ctx, ctx->mntpoint);
+	error = ptvar_alloc(scrub_nproc(ctx), sizeof(struct summary_counts),
+			&ptvar);
+	if (error) {
+		str_liberror(ctx, error, _("setting up block counter"));
 		return false;
 	}
 
@@ -141,9 +147,11 @@ xfs_scan_summary(
 	moveon = xfs_scan_all_spacemaps(ctx, xfs_record_block_summary, ptvar);
 	if (!moveon)
 		goto out_free;
-	moveon = ptvar_foreach(ptvar, xfs_add_summaries, &totalcount);
-	if (!moveon)
+	error = ptvar_foreach(ptvar, xfs_add_summaries, &totalcount);
+	if (error) {
+		str_liberror(ctx, error, _("counting blocks"));
 		goto out_free;
+	}
 	ptvar_free(ptvar);
 
 	/* Scan the whole fs. */
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index e59d3e67..95987a9b 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -91,9 +91,9 @@ read_verify_pool_init(
 	rvp->ctx = ctx;
 	rvp->disk = disk;
 	rvp->ioerr_fn = ioerr_fn;
-	rvp->rvstate = ptvar_init(submitter_threads,
-			sizeof(struct read_verify));
-	if (rvp->rvstate == NULL)
+	error = ptvar_alloc(submitter_threads, sizeof(struct read_verify),
+			&rvp->rvstate);
+	if (error)
 		goto out_counter;
 	/* Run in the main thread if we only want one thread. */
 	if (nproc == 1)
@@ -220,9 +220,12 @@ read_verify_schedule_io(
 	struct read_verify		*rv;
 	uint64_t			req_end;
 	uint64_t			rv_end;
+	int				ret;
 
 	assert(rvp->readbuf);
-	rv = ptvar_get(rvp->rvstate);
+	rv = ptvar_get(rvp->rvstate, &ret);
+	if (ret)
+		return false;
 	req_end = start + length;
 	rv_end = rv->io_start + rv->io_length;
 
@@ -259,9 +262,12 @@ read_verify_force_io(
 {
 	struct read_verify		*rv;
 	bool				moveon;
+	int				ret;
 
 	assert(rvp->readbuf);
-	rv = ptvar_get(rvp->rvstate);
+	rv = ptvar_get(rvp->rvstate, &ret);
+	if (ret)
+		return false;
 	if (rv->io_length == 0)
 		return true;
 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 06/13] libfrog: add missing per-thread variable error handling
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (4 preceding siblings ...)
  2019-09-06  3:36 ` [PATCH 05/13] libfrog: fix per-thread variable error communication problems Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:36 ` [PATCH 07/13] libfrog: fix bitmap error communication problems Darrick J. Wong
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Add missing return value checks for everything that the per-thread
variable code calls.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/ptvar.c |   17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)


diff --git a/libfrog/ptvar.c b/libfrog/ptvar.c
index 6cb58208..55324b71 100644
--- a/libfrog/ptvar.c
+++ b/libfrog/ptvar.c
@@ -44,8 +44,12 @@ ptvar_alloc(
 	int		ret;
 
 #ifdef _SC_LEVEL1_DCACHE_LINESIZE
+	long		l1_dcache;
+
 	/* Try to prevent cache pingpong by aligning to cacheline size. */
-	size = max(size, sysconf(_SC_LEVEL1_DCACHE_LINESIZE));
+	l1_dcache = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+	if (l1_dcache > 0)
+		size = roundup(size, l1_dcache);
 #endif
 
 	ptv = malloc(PTVAR_SIZE(nr, size));
@@ -88,17 +92,26 @@ ptvar_get(
 	int		*retp)
 {
 	void		*p;
+	int		ret;
 
 	p = pthread_getspecific(ptv->key);
 	if (!p) {
 		pthread_mutex_lock(&ptv->lock);
 		assert(ptv->nr_used < ptv->nr_counters);
 		p = &ptv->data[(ptv->nr_used++) * ptv->data_size];
-		pthread_setspecific(ptv->key, p);
+		ret = pthread_setspecific(ptv->key, p);
+		if (ret)
+			goto out_unlock;
 		pthread_mutex_unlock(&ptv->lock);
 	}
 	*retp = 0;
 	return p;
+
+out_unlock:
+	ptv->nr_used--;
+	pthread_mutex_unlock(&ptv->lock);
+	*retp = ret;
+	return NULL;
 }
 
 /* Iterate all of the per-thread variables. */


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 07/13] libfrog: fix bitmap error communication problems
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (5 preceding siblings ...)
  2019-09-06  3:36 ` [PATCH 06/13] libfrog: add missing per-thread variable error handling Darrick J. Wong
@ 2019-09-06  3:36 ` Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 08/13] libfrog: fix missing error checking in bitmap code Darrick J. Wong
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:36 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Convert all the libfrog code and callers away from the libc-style
indirect errno returns to directly returning error codes to callers.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/bitmap.c |   13 +++++++------
 libfrog/bitmap.h |    2 +-
 repair/rmap.c    |    4 ++--
 scrub/phase6.c   |   20 +++++++++++---------
 4 files changed, 21 insertions(+), 18 deletions(-)


diff --git a/libfrog/bitmap.c b/libfrog/bitmap.c
index 4dafc4c9..be95965f 100644
--- a/libfrog/bitmap.c
+++ b/libfrog/bitmap.c
@@ -23,7 +23,8 @@
  */
 
 #define avl_for_each_range_safe(pos, n, l, first, last) \
-	for (pos = (first), n = pos->avl_nextino, l = (last)->avl_nextino; pos != (l); \
+	for (pos = (first), n = pos->avl_nextino, l = (last)->avl_nextino; \
+			pos != (l); \
 			pos = n, n = pos ? pos->avl_nextino : NULL)
 
 #define avl_for_each_safe(tree, pos, n) \
@@ -67,18 +68,18 @@ static struct avl64ops bitmap_ops = {
 
 /* Initialize a bitmap. */
 int
-bitmap_init(
+bitmap_alloc(
 	struct bitmap		**bmapp)
 {
 	struct bitmap		*bmap;
 
 	bmap = calloc(1, sizeof(struct bitmap));
 	if (!bmap)
-		return -ENOMEM;
+		return errno;
 	bmap->bt_tree = malloc(sizeof(struct avl64tree_desc));
 	if (!bmap->bt_tree) {
 		free(bmap);
-		return -ENOMEM;
+		return errno;
 	}
 
 	pthread_mutex_init(&bmap->bt_lock, NULL);
@@ -139,12 +140,12 @@ __bitmap_insert(
 
 	ext = bitmap_node_init(start, length);
 	if (!ext)
-		return -ENOMEM;
+		return errno;
 
 	node = avl64_insert(bmap->bt_tree, &ext->btn_node);
 	if (node == NULL) {
 		free(ext);
-		return -EEXIST;
+		return EEXIST;
 	}
 
 	return 0;
diff --git a/libfrog/bitmap.h b/libfrog/bitmap.h
index 40119b9c..759386a8 100644
--- a/libfrog/bitmap.h
+++ b/libfrog/bitmap.h
@@ -11,7 +11,7 @@ struct bitmap {
 	struct avl64tree_desc	*bt_tree;
 };
 
-int bitmap_init(struct bitmap **bmap);
+int bitmap_alloc(struct bitmap **bmap);
 void bitmap_free(struct bitmap **bmap);
 int bitmap_set(struct bitmap *bmap, uint64_t start, uint64_t length);
 int bitmap_iterate(struct bitmap *bmap, int (*fn)(uint64_t, uint64_t, void *),
diff --git a/repair/rmap.c b/repair/rmap.c
index b907383e..c6ed25a9 100644
--- a/repair/rmap.c
+++ b/repair/rmap.c
@@ -490,13 +490,13 @@ rmap_store_ag_btree_rec(
 	error = init_slab_cursor(ag_rmap->ar_raw_rmaps, rmap_compare, &rm_cur);
 	if (error)
 		goto err;
-	error = -bitmap_init(&own_ag_bitmap);
+	error = bitmap_alloc(&own_ag_bitmap);
 	if (error)
 		goto err_slab;
 	while ((rm_rec = pop_slab_cursor(rm_cur)) != NULL) {
 		if (rm_rec->rm_owner != XFS_RMAP_OWN_AG)
 			continue;
-		error = -bitmap_set(own_ag_bitmap, rm_rec->rm_startblock,
+		error = bitmap_set(own_ag_bitmap, rm_rec->rm_startblock,
 					rm_rec->rm_blockcount);
 		if (error) {
 			/*
diff --git a/scrub/phase6.c b/scrub/phase6.c
index aff04e76..d9285fee 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -341,6 +341,7 @@ xfs_check_rmap_ioerr(
 	struct media_verify_state	*vs = arg;
 	struct bitmap			*tree;
 	dev_t				dev;
+	int				ret;
 
 	dev = xfs_disk_to_dev(ctx, disk);
 
@@ -355,9 +356,9 @@ xfs_check_rmap_ioerr(
 	else
 		tree = NULL;
 	if (tree) {
-		errno = -bitmap_set(tree, start, length);
-		if (errno)
-			str_errno(ctx, ctx->mntpoint);
+		ret = bitmap_set(tree, start, length);
+		if (ret)
+			str_liberror(ctx, ret, _("setting bad block bitmap"));
 	}
 
 	snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "),
@@ -454,16 +455,17 @@ xfs_scan_blocks(
 {
 	struct media_verify_state	vs = { NULL };
 	bool				moveon = false;
+	int				ret;
 
-	errno = -bitmap_init(&vs.d_bad);
-	if (errno) {
-		str_errno(ctx, ctx->mntpoint);
+	ret = bitmap_alloc(&vs.d_bad);
+	if (ret) {
+		str_liberror(ctx, ret, _("creating datadev badblock bitmap"));
 		goto out;
 	}
 
-	errno = -bitmap_init(&vs.r_bad);
-	if (errno) {
-		str_errno(ctx, ctx->mntpoint);
+	ret = bitmap_alloc(&vs.r_bad);
+	if (ret) {
+		str_liberror(ctx, ret, _("creating realtime badblock bitmap"));
 		goto out_dbad;
 	}
 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 08/13] libfrog: fix missing error checking in bitmap code
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (6 preceding siblings ...)
  2019-09-06  3:36 ` [PATCH 07/13] libfrog: fix bitmap error communication problems Darrick J. Wong
@ 2019-09-06  3:37 ` Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 09/13] xfs_scrub: fix per-thread counter error communication problems Darrick J. Wong
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:37 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Check library calls for error codes being returned.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/bitmap.c |   15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)


diff --git a/libfrog/bitmap.c b/libfrog/bitmap.c
index be95965f..a75d085a 100644
--- a/libfrog/bitmap.c
+++ b/libfrog/bitmap.c
@@ -72,21 +72,30 @@ bitmap_alloc(
 	struct bitmap		**bmapp)
 {
 	struct bitmap		*bmap;
+	int			ret;
 
 	bmap = calloc(1, sizeof(struct bitmap));
 	if (!bmap)
 		return errno;
 	bmap->bt_tree = malloc(sizeof(struct avl64tree_desc));
 	if (!bmap->bt_tree) {
-		free(bmap);
-		return errno;
+		ret = errno;
+		goto out;
 	}
 
-	pthread_mutex_init(&bmap->bt_lock, NULL);
+	ret = pthread_mutex_init(&bmap->bt_lock, NULL);
+	if (ret)
+		goto out_tree;
+
 	avl64_init_tree(bmap->bt_tree, &bitmap_ops);
 	*bmapp = bmap;
 
 	return 0;
+out_tree:
+	free(bmap->bt_tree);
+out:
+	free(bmap);
+	return ret;
 }
 
 /* Free a bitmap. */


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 09/13] xfs_scrub: fix per-thread counter error communication problems
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (7 preceding siblings ...)
  2019-09-06  3:37 ` [PATCH 08/13] libfrog: fix missing error checking in bitmap code Darrick J. Wong
@ 2019-09-06  3:37 ` Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 10/13] xfs_scrub: report all progressbar creation failures Darrick J. Wong
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:37 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Fix all the places in the per-thread counter functions either we fail to
check for runtime errors or fail to communicate them properly to
callers.  Then fix all the callers to report the error messages instead
of hiding them.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/counter.c     |   33 ++++++++++++++++++---------------
 scrub/counter.h     |    6 +++---
 scrub/phase3.c      |   23 +++++++++++++++++------
 scrub/progress.c    |   12 +++++++++---
 scrub/read_verify.c |    9 ++++++---
 5 files changed, 53 insertions(+), 30 deletions(-)


diff --git a/scrub/counter.c b/scrub/counter.c
index 49ffbfea..1bb726dc 100644
--- a/scrub/counter.c
+++ b/scrub/counter.c
@@ -26,23 +26,25 @@ struct ptcounter {
 	struct ptvar	*var;
 };
 
-/* Initialize per-thread counter. */
-struct ptcounter *
-ptcounter_init(
-	size_t			nr)
+/* Allocate per-thread counter. */
+int
+ptcounter_alloc(
+	size_t			nr,
+	struct ptcounter	**pp)
 {
 	struct ptcounter	*p;
 	int			ret;
 
 	p = malloc(sizeof(struct ptcounter));
 	if (!p)
-		return NULL;
+		return errno;
 	ret = ptvar_alloc(nr, sizeof(uint64_t), &p->var);
 	if (ret) {
 		free(p);
-		return NULL;
+		return ret;
 	}
-	return p;
+	*pp = p;
+	return 0;
 }
 
 /* Free per-thread counter. */
@@ -55,7 +57,7 @@ ptcounter_free(
 }
 
 /* Add a quantity to the counter. */
-void
+int
 ptcounter_add(
 	struct ptcounter	*ptc,
 	int64_t			nr)
@@ -64,8 +66,10 @@ ptcounter_add(
 	int			ret;
 
 	p = ptvar_get(ptc->var, &ret);
-	assert(ret == 0);
+	if (ret)
+		return ret;
 	*p += nr;
+	return 0;
 }
 
 static int
@@ -82,12 +86,11 @@ ptcounter_val_helper(
 }
 
 /* Return the approximate value of this counter. */
-uint64_t
+int
 ptcounter_value(
-	struct ptcounter	*ptc)
+	struct ptcounter	*ptc,
+	uint64_t		*sum)
 {
-	uint64_t		sum = 0;
-
-	ptvar_foreach(ptc->var, ptcounter_val_helper, &sum);
-	return sum;
+	*sum = 0;
+	return ptvar_foreach(ptc->var, ptcounter_val_helper, sum);
 }
diff --git a/scrub/counter.h b/scrub/counter.h
index 6c501b85..01b65056 100644
--- a/scrub/counter.h
+++ b/scrub/counter.h
@@ -7,9 +7,9 @@
 #define XFS_SCRUB_COUNTER_H_
 
 struct ptcounter;
-struct ptcounter *ptcounter_init(size_t nr);
+int ptcounter_alloc(size_t nr, struct ptcounter **pp);
 void ptcounter_free(struct ptcounter *ptc);
-void ptcounter_add(struct ptcounter *ptc, int64_t nr);
-uint64_t ptcounter_value(struct ptcounter *ptc);
+int ptcounter_add(struct ptcounter *ptc, int64_t nr);
+int ptcounter_value(struct ptcounter *ptc, uint64_t *sum);
 
 #endif /* XFS_SCRUB_COUNTER_H_ */
diff --git a/scrub/phase3.c b/scrub/phase3.c
index a32d1ced..1e908c2c 100644
--- a/scrub/phase3.c
+++ b/scrub/phase3.c
@@ -139,7 +139,12 @@ xfs_scrub_inode(
 		goto out;
 
 out:
-	ptcounter_add(icount, 1);
+	error = ptcounter_add(icount, 1);
+	if (error) {
+		str_liberror(ctx, error,
+				_("incrementing scanned inode counter"));
+		return false;
+	}
 	progress_add(1);
 	xfs_action_list_defer(ctx, agno, &alist);
 	if (fd >= 0) {
@@ -158,12 +163,14 @@ xfs_scan_inodes(
 	struct scrub_ctx	*ctx)
 {
 	struct scrub_inode_ctx	ictx;
+	uint64_t		val;
+	int			err;
 	bool			ret;
 
 	ictx.moveon = true;
-	ictx.icount = ptcounter_init(scrub_nproc(ctx));
-	if (!ictx.icount) {
-		str_info(ctx, ctx->mntpoint, _("Could not create counter."));
+	err = ptcounter_alloc(scrub_nproc(ctx), &ictx.icount);
+	if (err) {
+		str_liberror(ctx, err, _("creating scanned inode counter"));
 		return false;
 	}
 
@@ -173,8 +180,12 @@ xfs_scan_inodes(
 	if (!ictx.moveon)
 		goto free;
 	xfs_scrub_report_preen_triggers(ctx);
-	ctx->inodes_checked = ptcounter_value(ictx.icount);
-
+	err = ptcounter_value(ictx.icount, &val);
+	if (err) {
+		str_liberror(ctx, err, _("summing scanned inode counter"));
+		return false;
+	}
+	ctx->inodes_checked = val;
 free:
 	ptcounter_free(ictx.icount);
 	return ictx.moveon;
diff --git a/scrub/progress.c b/scrub/progress.c
index c9a9d286..08c7233e 100644
--- a/scrub/progress.c
+++ b/scrub/progress.c
@@ -119,6 +119,8 @@ progress_report_thread(void *arg)
 
 	pthread_mutex_lock(&pt.lock);
 	while (1) {
+		uint64_t	progress_val;
+
 		/* Every half second. */
 		ret = clock_gettime(CLOCK_REALTIME, &abstime);
 		if (ret)
@@ -131,7 +133,9 @@ progress_report_thread(void *arg)
 		pthread_cond_timedwait(&pt.wakeup, &pt.lock, &abstime);
 		if (pt.terminate)
 			break;
-		progress_report(ptcounter_value(pt.ptc));
+		ret = ptcounter_value(pt.ptc, &progress_val);
+		if (!ret)
+			progress_report(progress_val);
 	}
 	pthread_mutex_unlock(&pt.lock);
 	return NULL;
@@ -187,9 +191,11 @@ progress_init_phase(
 	pt.twiddle = 0;
 	pt.terminate = false;
 
-	pt.ptc = ptcounter_init(nr_threads);
-	if (!pt.ptc)
+	ret = ptcounter_alloc(nr_threads, &pt.ptc);
+	if (ret) {
+		str_liberror(ctx, ret, _("allocating progress counter"));
 		goto out_max;
+	}
 
 	ret = pthread_create(&pt.thread, NULL, progress_report_thread, NULL);
 	if (ret)
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index 95987a9b..b890c92f 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -84,8 +84,8 @@ read_verify_pool_init(
 			RVP_IO_MAX_SIZE);
 	if (error || !rvp->readbuf)
 		goto out_free;
-	rvp->verified_bytes = ptcounter_init(nproc);
-	if (!rvp->verified_bytes)
+	error = ptcounter_alloc(nproc, &rvp->verified_bytes);
+	if (error)
 		goto out_buf;
 	rvp->miniosz = miniosz;
 	rvp->ctx = ctx;
@@ -282,5 +282,8 @@ uint64_t
 read_verify_bytes(
 	struct read_verify_pool		*rvp)
 {
-	return ptcounter_value(rvp->verified_bytes);
+	uint64_t			ret;
+
+	ptcounter_value(rvp->verified_bytes, &ret);
+	return ret;
 }


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 10/13] xfs_scrub: report all progressbar creation failures
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (8 preceding siblings ...)
  2019-09-06  3:37 ` [PATCH 09/13] xfs_scrub: fix per-thread counter error communication problems Darrick J. Wong
@ 2019-09-06  3:37 ` Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 11/13] xfs_scrub: check progress bar timedwait failures Darrick J. Wong
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:37 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Always report failures when creating progress bars.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/progress.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)


diff --git a/scrub/progress.c b/scrub/progress.c
index 08c7233e..5fda4ccb 100644
--- a/scrub/progress.c
+++ b/scrub/progress.c
@@ -198,8 +198,10 @@ progress_init_phase(
 	}
 
 	ret = pthread_create(&pt.thread, NULL, progress_report_thread, NULL);
-	if (ret)
+	if (ret) {
+		str_liberror(ctx, ret, _("creating progress reporting thread"));
 		goto out_ptcounter;
+	}
 
 	return true;
 


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 11/13] xfs_scrub: check progress bar timedwait failures
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (9 preceding siblings ...)
  2019-09-06  3:37 ` [PATCH 10/13] xfs_scrub: report all progressbar creation failures Darrick J. Wong
@ 2019-09-06  3:37 ` Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 12/13] xfs_scrub: move all the queue_subdir error reporting to callers Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 13/13] xfs_scrub: fix error handling problems in vfs.c Darrick J. Wong
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:37 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Check for failures in the timedwait for progressbar reporting.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/progress.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)


diff --git a/scrub/progress.c b/scrub/progress.c
index 5fda4ccb..e93b607f 100644
--- a/scrub/progress.c
+++ b/scrub/progress.c
@@ -130,7 +130,9 @@ progress_report_thread(void *arg)
 			abstime.tv_sec++;
 			abstime.tv_nsec -= NSEC_PER_SEC;
 		}
-		pthread_cond_timedwait(&pt.wakeup, &pt.lock, &abstime);
+		ret = pthread_cond_timedwait(&pt.wakeup, &pt.lock, &abstime);
+		if (ret && ret != ETIMEDOUT)
+			break;
 		if (pt.terminate)
 			break;
 		ret = ptcounter_value(pt.ptc, &progress_val);


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 12/13] xfs_scrub: move all the queue_subdir error reporting to callers
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (10 preceding siblings ...)
  2019-09-06  3:37 ` [PATCH 11/13] xfs_scrub: check progress bar timedwait failures Darrick J. Wong
@ 2019-09-06  3:37 ` Darrick J. Wong
  2019-09-06  3:37 ` [PATCH 13/13] xfs_scrub: fix error handling problems in vfs.c Darrick J. Wong
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:37 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Change queue_subdir to return a positive error code to callers and move
the error reporting to the callers.  This continues the process of
changing internal functions to return error codes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |   29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index 49d689af..e0bc3ea4 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -72,7 +72,7 @@ dec_nr_dirs(
 }
 
 /* Queue a directory for scanning. */
-static bool
+static int
 queue_subdir(
 	struct scrub_ctx	*ctx,
 	struct scan_fs_tree	*sft,
@@ -84,14 +84,12 @@ queue_subdir(
 	int			error;
 
 	new_sftd = malloc(sizeof(struct scan_fs_tree_dir));
-	if (!new_sftd) {
-		str_errno(ctx, _("creating directory scan context"));
-		return false;
-	}
+	if (!new_sftd)
+		return errno;
 
 	new_sftd->path = strdup(path);
 	if (!new_sftd->path) {
-		str_errno(ctx, _("creating directory scan path"));
+		error = errno;
 		goto out_sftd;
 	}
 
@@ -106,12 +104,12 @@ queue_subdir(
 		goto out_path;
 	}
 
-	return true;
+	return 0;
 out_path:
 	free(new_sftd->path);
 out_sftd:
 	free(new_sftd);
-	return false;
+	return error;
 }
 
 /* Scan a directory sub tree. */
@@ -187,10 +185,13 @@ scan_fs_dir(
 		/* If directory, call ourselves recursively. */
 		if (S_ISDIR(sb.st_mode) && strcmp(".", dirent->d_name) &&
 		    strcmp("..", dirent->d_name)) {
-			sft->moveon = queue_subdir(ctx, sft, wq, newpath,
-					false);
-			if (!sft->moveon)
+			error = queue_subdir(ctx, sft, wq, newpath, false);
+			if (error) {
+				str_liberror(ctx, error,
+_("queueing subdirectory scan"));
+				sft->moveon = false;
 				break;
+			}
 		}
 	}
 
@@ -233,9 +234,11 @@ scan_fs_tree(
 		return false;
 	}
 
-	sft.moveon = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
-	if (!sft.moveon)
+	ret = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
+	if (ret) {
+		str_liberror(ctx, ret, _("queueing directory scan"));
 		goto out_wq;
+	}
 
 	/*
 	 * Wait for the wakeup to trigger, which should only happen when the


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 13/13] xfs_scrub: fix error handling problems in vfs.c
  2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
                   ` (11 preceding siblings ...)
  2019-09-06  3:37 ` [PATCH 12/13] xfs_scrub: move all the queue_subdir error reporting to callers Darrick J. Wong
@ 2019-09-06  3:37 ` Darrick J. Wong
  12 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-06  3:37 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Fix all the places where we drop or screw up error handling in
scan_fs_tree.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/vfs.c |   27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)


diff --git a/scrub/vfs.c b/scrub/vfs.c
index e0bc3ea4..d7c40239 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -216,6 +216,7 @@ scan_fs_tree(
 {
 	struct workqueue	wq;
 	struct scan_fs_tree	sft;
+	bool			moveon = false;
 	int			ret;
 
 	sft.moveon = true;
@@ -224,14 +225,22 @@ scan_fs_tree(
 	sft.dir_fn = dir_fn;
 	sft.dirent_fn = dirent_fn;
 	sft.arg = arg;
-	pthread_mutex_init(&sft.lock, NULL);
-	pthread_cond_init(&sft.wakeup, NULL);
+	ret = pthread_mutex_init(&sft.lock, NULL);
+	if (ret) {
+		str_liberror(ctx, ret, _("creating directory scan lock"));
+		return false;
+	}
+	ret = pthread_cond_init(&sft.wakeup, NULL);
+	if (ret) {
+		str_liberror(ctx, ret, _("creating directory scan signal"));
+		goto out_mutex;
+	}
 
 	ret = workqueue_create(&wq, (struct xfs_mount *)ctx,
 			scrub_nproc_workqueue(ctx));
 	if (ret) {
-		str_info(ctx, ctx->mntpoint, _("Could not create workqueue."));
-		return false;
+		str_liberror(ctx, ret, _("creating directory scan workqueue"));
+		goto out_cond;
 	}
 
 	ret = queue_subdir(ctx, &sft, &wq, ctx->mntpoint, true);
@@ -255,12 +264,18 @@ scan_fs_tree(
 
 	ret = workqueue_terminate(&wq);
 	if (ret) {
-		sft.moveon = false;
 		str_liberror(ctx, ret, _("finishing directory scan work"));
+		goto out_wq;
 	}
+
+	moveon = sft.moveon;
 out_wq:
 	workqueue_destroy(&wq);
-	return sft.moveon;
+out_cond:
+	pthread_cond_destroy(&sft.wakeup);
+out_mutex:
+	pthread_mutex_destroy(&sft.lock);
+	return moveon;
 }
 
 #ifndef FITRIM


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 03/13] libfrog: split workqueue destroy functions
  2019-09-25 21:33 ` [PATCH 03/13] libfrog: split workqueue destroy functions Darrick J. Wong
@ 2019-09-30 22:33   ` Eric Sandeen
  0 siblings, 0 replies; 17+ messages in thread
From: Eric Sandeen @ 2019-09-30 22:33 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: linux-xfs



On 9/25/19 4:33 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> 
> Split the workqueue destroy function into two parts -- one to signal all
> the threads to exit and wait for them, and a second one that actually
> destroys all the memory associated with the workqueue.  This mean we can
> report latent workqueue errors independent of the freeing function.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  libfrog/workqueue.c |   37 ++++++++++++++++++++++++++++++-------
>  libfrog/workqueue.h |    2 ++
>  repair/threads.c    |    6 ++++++
>  scrub/fscounters.c  |   11 ++++++++++-
>  scrub/inodes.c      |    5 +++++
>  scrub/phase2.c      |    5 +++++
>  scrub/phase4.c      |    6 ++++++
>  scrub/read_verify.c |    1 +
>  scrub/spacemap.c    |    5 +++++
>  scrub/vfs.c         |    5 +++++
>  10 files changed, 75 insertions(+), 8 deletions(-)

Reviewed-by: Eric Sandeen <sandeen@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 03/13] libfrog: split workqueue destroy functions
  2019-09-25 21:33 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
@ 2019-09-25 21:33 ` Darrick J. Wong
  2019-09-30 22:33   ` Eric Sandeen
  0 siblings, 1 reply; 17+ messages in thread
From: Darrick J. Wong @ 2019-09-25 21:33 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Split the workqueue destroy function into two parts -- one to signal all
the threads to exit and wait for them, and a second one that actually
destroys all the memory associated with the workqueue.  This mean we can
report latent workqueue errors independent of the freeing function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 libfrog/workqueue.c |   37 ++++++++++++++++++++++++++++++-------
 libfrog/workqueue.h |    2 ++
 repair/threads.c    |    6 ++++++
 scrub/fscounters.c  |   11 ++++++++++-
 scrub/inodes.c      |    5 +++++
 scrub/phase2.c      |    5 +++++
 scrub/phase4.c      |    6 ++++++
 scrub/read_verify.c |    1 +
 scrub/spacemap.c    |    5 +++++
 scrub/vfs.c         |    5 +++++
 10 files changed, 75 insertions(+), 8 deletions(-)


diff --git a/libfrog/workqueue.c b/libfrog/workqueue.c
index 48038363..07f11a7b 100644
--- a/libfrog/workqueue.c
+++ b/libfrog/workqueue.c
@@ -82,6 +82,7 @@ workqueue_create(
 		goto out_mutex;
 	}
 	wq->terminate = false;
+	wq->terminated = false;
 
 	for (i = 0; i < nr_workers; i++) {
 		err = pthread_create(&wq->threads[i], NULL, workqueue_thread,
@@ -119,6 +120,8 @@ workqueue_add(
 	struct workqueue_item	*wi;
 	int			ret;
 
+	assert(!wq->terminated);
+
 	if (wq->thread_count == 0) {
 		func(wq, index, arg);
 		return 0;
@@ -157,22 +160,42 @@ workqueue_add(
 
 /*
  * Wait for all pending work items to be processed and tear down the
- * workqueue.
+ * workqueue thread pool.
  */
-void
-workqueue_destroy(
+int
+workqueue_terminate(
 	struct workqueue	*wq)
 {
 	unsigned int		i;
+	int			ret;
+
+	pthread_mutex_lock(&wq->lock);
+	wq->terminate = true;
+	pthread_mutex_unlock(&wq->lock);
+
+	ret = pthread_cond_broadcast(&wq->wakeup);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < wq->thread_count; i++) {
+		ret = pthread_join(wq->threads[i], NULL);
+		if (ret)
+			return ret;
+	}
 
 	pthread_mutex_lock(&wq->lock);
-	wq->terminate = 1;
+	wq->terminated = true;
 	pthread_mutex_unlock(&wq->lock);
 
-	pthread_cond_broadcast(&wq->wakeup);
+	return 0;
+}
 
-	for (i = 0; i < wq->thread_count; i++)
-		pthread_join(wq->threads[i], NULL);
+/* Tear down the workqueue. */
+void
+workqueue_destroy(
+	struct workqueue	*wq)
+{
+	assert(wq->terminated);
 
 	free(wq->threads);
 	pthread_mutex_destroy(&wq->lock);
diff --git a/libfrog/workqueue.h b/libfrog/workqueue.h
index a1f3a57c..a56d1cf1 100644
--- a/libfrog/workqueue.h
+++ b/libfrog/workqueue.h
@@ -30,12 +30,14 @@ struct workqueue {
 	unsigned int		item_count;
 	unsigned int		thread_count;
 	bool			terminate;
+	bool			terminated;
 };
 
 int workqueue_create(struct workqueue *wq, void *wq_ctx,
 		unsigned int nr_workers);
 int workqueue_add(struct workqueue *wq, workqueue_func_t fn,
 		uint32_t index, void *arg);
+int workqueue_terminate(struct workqueue *wq);
 void workqueue_destroy(struct workqueue *wq);
 
 #endif	/* __LIBFROG_WORKQUEUE_H__ */
diff --git a/repair/threads.c b/repair/threads.c
index d2190920..9b7241e3 100644
--- a/repair/threads.c
+++ b/repair/threads.c
@@ -56,5 +56,11 @@ void
 destroy_work_queue(
 	struct workqueue	*wq)
 {
+	int			err;
+
+	err = workqueue_terminate(wq);
+	if (err)
+		do_error(_("cannot terminate worker item, error = [%d] %s\n"),
+				err, strerror(err));
 	workqueue_destroy(wq);
 }
diff --git a/scrub/fscounters.c b/scrub/fscounters.c
index 669c5ab0..98aa3826 100644
--- a/scrub/fscounters.c
+++ b/scrub/fscounters.c
@@ -102,7 +102,7 @@ xfs_count_all_inodes(
 	struct xfs_count_inodes	*ci;
 	xfs_agnumber_t		agno;
 	struct workqueue	wq;
-	bool			moveon;
+	bool			moveon = true;
 	int			ret;
 
 	ci = calloc(1, sizeof(struct xfs_count_inodes) +
@@ -126,8 +126,17 @@ xfs_count_all_inodes(
 			break;
 		}
 	}
+
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing icount work"));
+	}
 	workqueue_destroy(&wq);
 
+	if (!moveon)
+		goto out_free;
+
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
 		*count += ci->counters[agno];
 	moveon = ci->moveon;
diff --git a/scrub/inodes.c b/scrub/inodes.c
index 644a6372..c459a3b4 100644
--- a/scrub/inodes.c
+++ b/scrub/inodes.c
@@ -256,6 +256,11 @@ xfs_scan_all_inodes(
 		}
 	}
 
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		si.moveon = false;
+		str_liberror(ctx, ret, _("finishing bulkstat work"));
+	}
 	workqueue_destroy(&wq);
 
 	return si.moveon;
diff --git a/scrub/phase2.c b/scrub/phase2.c
index 1d2244a4..d92b7e29 100644
--- a/scrub/phase2.c
+++ b/scrub/phase2.c
@@ -161,6 +161,11 @@ xfs_scan_metadata(
 	}
 
 out:
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing scrub work"));
+	}
 	workqueue_destroy(&wq);
 	return moveon;
 }
diff --git a/scrub/phase4.c b/scrub/phase4.c
index 903da6d2..eb30c189 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -90,6 +90,12 @@ xfs_process_action_items(
 		if (!moveon)
 			break;
 	}
+
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing repair work"));
+	}
 	workqueue_destroy(&wq);
 
 	pthread_mutex_lock(&ctx->lock);
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index ff4d3572..bb8f09a8 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -120,6 +120,7 @@ void
 read_verify_pool_flush(
 	struct read_verify_pool		*rvp)
 {
+	workqueue_terminate(&rvp->wq);
 	workqueue_destroy(&rvp->wq);
 }
 
diff --git a/scrub/spacemap.c b/scrub/spacemap.c
index 4258e318..91e8badb 100644
--- a/scrub/spacemap.c
+++ b/scrub/spacemap.c
@@ -230,6 +230,11 @@ xfs_scan_all_spacemaps(
 		}
 	}
 out:
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		sbx.moveon = false;
+		str_liberror(ctx, ret, _("finishing fsmap work"));
+	}
 	workqueue_destroy(&wq);
 
 	return sbx.moveon;
diff --git a/scrub/vfs.c b/scrub/vfs.c
index 0cff2e3f..49d689af 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -250,6 +250,11 @@ scan_fs_tree(
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		sft.moveon = false;
+		str_liberror(ctx, ret, _("finishing directory scan work"));
+	}
 out_wq:
 	workqueue_destroy(&wq);
 	return sft.moveon;


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 03/13] libfrog: split workqueue destroy functions
  2019-08-26 21:28 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
@ 2019-08-26 21:28 ` Darrick J. Wong
  0 siblings, 0 replies; 17+ messages in thread
From: Darrick J. Wong @ 2019-08-26 21:28 UTC (permalink / raw)
  To: sandeen, darrick.wong; +Cc: linux-xfs

From: Darrick J. Wong <darrick.wong@oracle.com>

Split the workqueue destroy function into two parts -- one to signal all
the threads to exit and wait for them, and a second one that actually
destroys all the memory associated with the workqueue.  This mean we can
report latent workqueue errors independent of the freeing function.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/workqueue.h |    2 ++
 libfrog/workqueue.c |   45 +++++++++++++++++++++++++++++++++++++--------
 repair/threads.c    |    6 ++++++
 scrub/fscounters.c  |   11 ++++++++++-
 scrub/inodes.c      |    5 +++++
 scrub/phase2.c      |    5 +++++
 scrub/phase4.c      |    6 ++++++
 scrub/read_verify.c |    1 +
 scrub/spacemap.c    |    5 +++++
 scrub/vfs.c         |    5 +++++
 10 files changed, 82 insertions(+), 9 deletions(-)


diff --git a/include/workqueue.h b/include/workqueue.h
index c45dc4fb..024ce144 100644
--- a/include/workqueue.h
+++ b/include/workqueue.h
@@ -30,12 +30,14 @@ struct workqueue {
 	unsigned int		item_count;
 	unsigned int		thread_count;
 	bool			terminate;
+	bool			terminated;
 };
 
 int workqueue_create(struct workqueue *wq, void *wq_ctx,
 		unsigned int nr_workers);
 int workqueue_add(struct workqueue *wq, workqueue_func_t fn,
 		uint32_t index, void *arg);
+int workqueue_terminate(struct workqueue *wq);
 void workqueue_destroy(struct workqueue *wq);
 
 #endif	/* _WORKQUEUE_H_ */
diff --git a/libfrog/workqueue.c b/libfrog/workqueue.c
index 24b22bf6..503fe227 100644
--- a/libfrog/workqueue.c
+++ b/libfrog/workqueue.c
@@ -82,6 +82,7 @@ workqueue_create(
 		goto out_mutex;
 	}
 	wq->terminate = false;
+	wq->terminated = false;
 
 	for (i = 0; i < nr_workers; i++) {
 		err = pthread_create(&wq->threads[i], NULL, workqueue_thread,
@@ -119,6 +120,8 @@ workqueue_add(
 	struct workqueue_item	*wi;
 	int			ret;
 
+	assert(!wq->terminated);
+
 	if (wq->thread_count == 0) {
 		func(wq, index, arg);
 		return 0;
@@ -160,22 +163,48 @@ workqueue_add(
 
 /*
  * Wait for all pending work items to be processed and tear down the
- * workqueue.
+ * workqueue thread pool.
  */
-void
-workqueue_destroy(
+int
+workqueue_terminate(
 	struct workqueue	*wq)
 {
 	unsigned int		i;
+	int			ret;
+
+	ret = pthread_mutex_lock(&wq->lock);
+	if (ret)
+		return ret;
+
+	wq->terminate = true;
+	pthread_mutex_unlock(&wq->lock);
+
+	ret = pthread_cond_broadcast(&wq->wakeup);
+	if (ret)
+		return ret;
 
-	pthread_mutex_lock(&wq->lock);
-	wq->terminate = 1;
+	for (i = 0; i < wq->thread_count; i++) {
+		ret = pthread_join(wq->threads[i], NULL);
+		if (ret)
+			return ret;
+	}
+
+	ret = pthread_mutex_lock(&wq->lock);
+	if (ret)
+		return ret;
+
+	wq->terminated = true;
 	pthread_mutex_unlock(&wq->lock);
 
-	pthread_cond_broadcast(&wq->wakeup);
+	return 0;
+}
 
-	for (i = 0; i < wq->thread_count; i++)
-		pthread_join(wq->threads[i], NULL);
+/* Tear down the workqueue. */
+void
+workqueue_destroy(
+	struct workqueue	*wq)
+{
+	assert(wq->terminated);
 
 	free(wq->threads);
 	pthread_mutex_destroy(&wq->lock);
diff --git a/repair/threads.c b/repair/threads.c
index d2190920..9b7241e3 100644
--- a/repair/threads.c
+++ b/repair/threads.c
@@ -56,5 +56,11 @@ void
 destroy_work_queue(
 	struct workqueue	*wq)
 {
+	int			err;
+
+	err = workqueue_terminate(wq);
+	if (err)
+		do_error(_("cannot terminate worker item, error = [%d] %s\n"),
+				err, strerror(err));
 	workqueue_destroy(wq);
 }
diff --git a/scrub/fscounters.c b/scrub/fscounters.c
index ee07da9e..712a2b37 100644
--- a/scrub/fscounters.c
+++ b/scrub/fscounters.c
@@ -102,7 +102,7 @@ xfs_count_all_inodes(
 	struct xfs_count_inodes	*ci;
 	xfs_agnumber_t		agno;
 	struct workqueue	wq;
-	bool			moveon;
+	bool			moveon = true;
 	int			ret;
 
 	ci = calloc(1, sizeof(struct xfs_count_inodes) +
@@ -126,8 +126,17 @@ xfs_count_all_inodes(
 			break;
 		}
 	}
+
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing icount work"));
+	}
 	workqueue_destroy(&wq);
 
+	if (!moveon)
+		goto out_free;
+
 	for (agno = 0; agno < ctx->mnt.fsgeom.agcount; agno++)
 		*count += ci->counters[agno];
 	moveon = ci->moveon;
diff --git a/scrub/inodes.c b/scrub/inodes.c
index dcce2df0..ef12a692 100644
--- a/scrub/inodes.c
+++ b/scrub/inodes.c
@@ -253,6 +253,11 @@ xfs_scan_all_inodes(
 		}
 	}
 
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		si.moveon = false;
+		str_liberror(ctx, ret, _("finishing bulkstat work"));
+	}
 	workqueue_destroy(&wq);
 
 	return si.moveon;
diff --git a/scrub/phase2.c b/scrub/phase2.c
index 8c8aad97..6b4755a1 100644
--- a/scrub/phase2.c
+++ b/scrub/phase2.c
@@ -161,6 +161,11 @@ xfs_scan_metadata(
 	}
 
 out:
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing scrub work"));
+	}
 	workqueue_destroy(&wq);
 	return moveon;
 }
diff --git a/scrub/phase4.c b/scrub/phase4.c
index 10199ca1..79c8a6b8 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -90,6 +90,12 @@ xfs_process_action_items(
 		if (!moveon)
 			break;
 	}
+
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		moveon = false;
+		str_liberror(ctx, ret, _("finishing repair work"));
+	}
 	workqueue_destroy(&wq);
 
 	pthread_mutex_lock(&ctx->lock);
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index c7e34cf5..7d95ab00 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -120,6 +120,7 @@ void
 read_verify_pool_flush(
 	struct read_verify_pool		*rvp)
 {
+	workqueue_terminate(&rvp->wq);
 	workqueue_destroy(&rvp->wq);
 }
 
diff --git a/scrub/spacemap.c b/scrub/spacemap.c
index 03d05eed..7fe03163 100644
--- a/scrub/spacemap.c
+++ b/scrub/spacemap.c
@@ -230,6 +230,11 @@ xfs_scan_all_spacemaps(
 		}
 	}
 out:
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		sbx.moveon = false;
+		str_liberror(ctx, ret, _("finishing fsmap work"));
+	}
 	workqueue_destroy(&wq);
 
 	return sbx.moveon;
diff --git a/scrub/vfs.c b/scrub/vfs.c
index e5ed5d83..8aa58d6e 100644
--- a/scrub/vfs.c
+++ b/scrub/vfs.c
@@ -239,6 +239,11 @@ scan_fs_tree(
 	assert(sft.nr_dirs == 0);
 	pthread_mutex_unlock(&sft.lock);
 
+	ret = workqueue_terminate(&wq);
+	if (ret) {
+		sft.moveon = false;
+		str_liberror(ctx, ret, _("finishing directory scan work"));
+	}
 out_wq:
 	workqueue_destroy(&wq);
 	return sft.moveon;


^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, back to index

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-06  3:36 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
2019-09-06  3:36 ` [PATCH 01/13] libfrog: fix workqueue error communication problems Darrick J. Wong
2019-09-06  3:36 ` [PATCH 02/13] libfrog: fix missing error checking in workqueue code Darrick J. Wong
2019-09-06  3:36 ` [PATCH 03/13] libfrog: split workqueue destroy functions Darrick J. Wong
2019-09-06  3:36 ` [PATCH 04/13] xfs_scrub: redistribute read verify pool flush and destroy responsibilities Darrick J. Wong
2019-09-06  3:36 ` [PATCH 05/13] libfrog: fix per-thread variable error communication problems Darrick J. Wong
2019-09-06  3:36 ` [PATCH 06/13] libfrog: add missing per-thread variable error handling Darrick J. Wong
2019-09-06  3:36 ` [PATCH 07/13] libfrog: fix bitmap error communication problems Darrick J. Wong
2019-09-06  3:37 ` [PATCH 08/13] libfrog: fix missing error checking in bitmap code Darrick J. Wong
2019-09-06  3:37 ` [PATCH 09/13] xfs_scrub: fix per-thread counter error communication problems Darrick J. Wong
2019-09-06  3:37 ` [PATCH 10/13] xfs_scrub: report all progressbar creation failures Darrick J. Wong
2019-09-06  3:37 ` [PATCH 11/13] xfs_scrub: check progress bar timedwait failures Darrick J. Wong
2019-09-06  3:37 ` [PATCH 12/13] xfs_scrub: move all the queue_subdir error reporting to callers Darrick J. Wong
2019-09-06  3:37 ` [PATCH 13/13] xfs_scrub: fix error handling problems in vfs.c Darrick J. Wong
  -- strict thread matches above, loose matches on Subject: below --
2019-09-25 21:33 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
2019-09-25 21:33 ` [PATCH 03/13] libfrog: split workqueue destroy functions Darrick J. Wong
2019-09-30 22:33   ` Eric Sandeen
2019-08-26 21:28 [PATCH 00/13] libfrog/xfs_scrub: fix error handling Darrick J. Wong
2019-08-26 21:28 ` [PATCH 03/13] libfrog: split workqueue destroy functions Darrick J. Wong

Linux-XFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-xfs/0 linux-xfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-xfs linux-xfs/ https://lore.kernel.org/linux-xfs \
		linux-xfs@vger.kernel.org linux-xfs@archiver.kernel.org
	public-inbox-index linux-xfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-xfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox