All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: darrick.wong@oracle.com
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 09/23] xfs_scrub: one read/verify pool per disk
Date: Fri, 01 Mar 2019 15:27:50 -0800	[thread overview]
Message-ID: <155148287091.16677.17102964491448677196.stgit@magnolia> (raw)
In-Reply-To: <155148280859.16677.6057998944865066232.stgit@magnolia>

From: Darrick J. Wong <darrick.wong@oracle.com>

Simplify the read/verify pool code further by creating one pool per
disk.  This enables us to tailor the concurrency levels of each disk to
that specific disk so that if we have a mixed hdd/ssd environment we
don't flood the hdd with a lot of requests.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 scrub/phase6.c      |  110 ++++++++++++++++++++++++++++++++++++---------------
 scrub/read_verify.c |   29 ++++++-------
 scrub/read_verify.h |   10 +++--
 3 files changed, 98 insertions(+), 51 deletions(-)


diff --git a/scrub/phase6.c b/scrub/phase6.c
index fe121769..ccb795ab 100644
--- a/scrub/phase6.c
+++ b/scrub/phase6.c
@@ -33,18 +33,29 @@
  * and report the paths of the now corrupt files.
  */
 
+/* Verify disk blocks with GETFSMAP */
+
+struct xfs_verify_extent {
+	struct read_verify_pool	*rvp_data;
+	struct read_verify_pool	*rvp_log;
+	struct read_verify_pool	*rvp_realtime;
+	struct bitmap		*d_bad;		/* bytes */
+	struct bitmap		*r_bad;		/* bytes */
+};
+
 /* Find the fd for a given device identifier. */
-static struct disk *
-xfs_dev_to_disk(
-	struct scrub_ctx	*ctx,
-	dev_t			dev)
+static struct read_verify_pool *
+xfs_dev_to_pool(
+	struct scrub_ctx		*ctx,
+	struct xfs_verify_extent	*ve,
+	dev_t				dev)
 {
 	if (dev == ctx->fsinfo.fs_datadev)
-		return ctx->datadev;
+		return ve->rvp_data;
 	else if (dev == ctx->fsinfo.fs_logdev)
-		return ctx->logdev;
+		return ve->rvp_log;
 	else if (dev == ctx->fsinfo.fs_rtdev)
-		return ctx->rtdev;
+		return ve->rvp_realtime;
 	abort();
 }
 
@@ -285,14 +296,6 @@ xfs_report_verify_errors(
 	return xfs_scan_all_inodes(ctx, xfs_report_verify_inode, &vei);
 }
 
-/* Verify disk blocks with GETFSMAP */
-
-struct xfs_verify_extent {
-	struct read_verify_pool	*readverify;
-	struct bitmap		*d_bad;		/* bytes */
-	struct bitmap		*r_bad;		/* bytes */
-};
-
 /* Report an IO error resulting from read-verify based off getfsmap. */
 static bool
 xfs_check_rmap_error_report(
@@ -393,7 +396,9 @@ xfs_check_rmap(
 	void				*arg)
 {
 	struct xfs_verify_extent	*ve = arg;
-	struct disk			*disk;
+	struct read_verify_pool		*rvp;
+
+	rvp = xfs_dev_to_pool(ctx, ve, map->fmr_device);
 
 	dbg_printf("rmap dev %d:%d phys %"PRIu64" owner %"PRId64
 			" offset %"PRIu64" len %"PRIu64" flags 0x%x\n",
@@ -420,19 +425,32 @@ xfs_check_rmap(
 	/* XXX: Filter out directory data blocks. */
 
 	/* Schedule the read verify command for (eventual) running. */
-	disk = xfs_dev_to_disk(ctx, map->fmr_device);
-
-	read_verify_schedule_io(ve->readverify, disk, map->fmr_physical,
-			map->fmr_length, ve);
+	read_verify_schedule_io(rvp, map->fmr_physical, map->fmr_length, ve);
 
 out:
 	/* Is this the last extent?  Fire off the read. */
 	if (map->fmr_flags & FMR_OF_LAST)
-		read_verify_force_io(ve->readverify);
+		read_verify_force_io(rvp);
 
 	return true;
 }
 
+/* Wait for read/verify actions to finish, then return # bytes checked. */
+static uint64_t
+clean_pool(
+	struct read_verify_pool	*rvp)
+{
+	uint64_t		ret;
+
+	if (!rvp)
+		return 0;
+
+	read_verify_pool_flush(rvp);
+	ret += read_verify_bytes(rvp);
+	read_verify_pool_destroy(rvp);
+	return ret;
+}
+
 /*
  * Read verify all the file data blocks in a filesystem.  Since XFS doesn't
  * do data checksums, we trust that the underlying storage will pass back
@@ -445,7 +463,7 @@ bool
 xfs_scan_blocks(
 	struct scrub_ctx		*ctx)
 {
-	struct xfs_verify_extent	ve;
+	struct xfs_verify_extent	ve = { NULL };
 	bool				moveon;
 
 	moveon = bitmap_init(&ve.d_bad);
@@ -460,21 +478,43 @@ xfs_scan_blocks(
 		goto out_dbad;
 	}
 
-	ve.readverify = read_verify_pool_init(ctx, ctx->geo.blocksize,
-			xfs_check_rmap_ioerr, disk_heads(ctx->datadev),
+	ve.rvp_data = read_verify_pool_init(ctx, ctx->datadev,
+			ctx->geo.blocksize, xfs_check_rmap_ioerr,
 			scrub_nproc(ctx));
-	if (!ve.readverify) {
+	if (!ve.rvp_data) {
 		moveon = false;
 		str_info(ctx, ctx->mntpoint,
-_("Could not create media verifier."));
+_("Could not create data device media verifier."));
 		goto out_rbad;
 	}
+	if (ctx->logdev) {
+		ve.rvp_log = read_verify_pool_init(ctx, ctx->logdev,
+				ctx->geo.blocksize, xfs_check_rmap_ioerr,
+				scrub_nproc(ctx));
+		if (!ve.rvp_log) {
+			moveon = false;
+			str_info(ctx, ctx->mntpoint,
+	_("Could not create log device media verifier."));
+			goto out_datapool;
+		}
+	}
+	if (ctx->rtdev) {
+		ve.rvp_realtime = read_verify_pool_init(ctx, ctx->rtdev,
+				ctx->geo.blocksize, xfs_check_rmap_ioerr,
+				scrub_nproc(ctx));
+		if (!ve.rvp_realtime) {
+			moveon = false;
+			str_info(ctx, ctx->mntpoint,
+	_("Could not create realtime device media verifier."));
+			goto out_logpool;
+		}
+	}
 	moveon = xfs_scan_all_spacemaps(ctx, xfs_check_rmap, &ve);
 	if (!moveon)
-		goto out_pool;
-	read_verify_pool_flush(ve.readverify);
-	ctx->bytes_checked += read_verify_bytes(ve.readverify);
-	read_verify_pool_destroy(ve.readverify);
+		goto out_rtpool;
+	ctx->bytes_checked += clean_pool(ve.rvp_data);
+	ctx->bytes_checked += clean_pool(ve.rvp_log);
+	ctx->bytes_checked += clean_pool(ve.rvp_realtime);
 
 	/* Scan the whole dir tree to see what matches the bad extents. */
 	if (!bitmap_empty(ve.d_bad) || !bitmap_empty(ve.r_bad))
@@ -484,8 +524,14 @@ _("Could not create media verifier."));
 	bitmap_free(&ve.d_bad);
 	return moveon;
 
-out_pool:
-	read_verify_pool_destroy(ve.readverify);
+out_rtpool:
+	if (ve.rvp_realtime)
+		read_verify_pool_destroy(ve.rvp_realtime);
+out_logpool:
+	if (ve.rvp_log)
+		read_verify_pool_destroy(ve.rvp_log);
+out_datapool:
+	read_verify_pool_destroy(ve.rvp_data);
 out_rbad:
 	bitmap_free(&ve.r_bad);
 out_dbad:
diff --git a/scrub/read_verify.c b/scrub/read_verify.c
index b5774736..4a9b91f2 100644
--- a/scrub/read_verify.c
+++ b/scrub/read_verify.c
@@ -50,6 +50,7 @@ struct read_verify_pool {
 	void			*readbuf;	/* read buffer */
 	struct ptcounter	*verified_bytes;
 	struct ptvar		*rvstate;	/* combines read requests */
+	struct disk		*disk;		/* which disk? */
 	read_verify_ioerr_fn_t	ioerr_fn;	/* io error callback */
 	size_t			miniosz;	/* minimum io size, bytes */
 };
@@ -57,19 +58,18 @@ struct read_verify_pool {
 /*
  * Create a thread pool to run read verifiers.
  *
+ * @disk is the disk we want to verify.
  * @miniosz is the minimum size of an IO to expect (in bytes).
  * @ioerr_fn will be called when IO errors occur.
- * @nproc is the maximum number of verify requests that may be sent to a disk
- * at any given time.
  * @submitter_threads is the number of threads that may be sending verify
  * requests at any given time.
  */
 struct read_verify_pool *
 read_verify_pool_init(
 	struct scrub_ctx		*ctx,
+	struct disk			*disk,
 	size_t				miniosz,
 	read_verify_ioerr_fn_t		ioerr_fn,
-	unsigned int			nproc,
 	unsigned int			submitter_threads)
 {
 	struct read_verify_pool		*rvp;
@@ -89,6 +89,7 @@ read_verify_pool_init(
 		goto out_buf;
 	rvp->miniosz = miniosz;
 	rvp->ctx = ctx;
+	rvp->disk = disk;
 	rvp->ioerr_fn = ioerr_fn;
 	rvp->rvstate = ptvar_init(submitter_threads,
 			sizeof(struct read_verify));
@@ -97,7 +98,8 @@ read_verify_pool_init(
 	/* Run in the main thread if we only want one thread. */
 	if (nproc == 1)
 		nproc = 0;
-	ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp, nproc);
+	ret = workqueue_create(&rvp->wq, (struct xfs_mount *)rvp,
+			disk_heads(disk));
 	if (ret)
 		goto out_rvstate;
 	return rvp;
@@ -150,17 +152,16 @@ read_verify(
 	rvp = (struct read_verify_pool *)wq->wq_ctx;
 	while (rv->io_length > 0) {
 		len = min(rv->io_length, RVP_IO_MAX_SIZE);
-		dbg_printf("diskverify %d %"PRIu64" %zu\n", rv->io_disk->d_fd,
-				rv->io_start, len);
-		sz = disk_read_verify(rv->io_disk, rvp->readbuf,
+		dbg_printf("diskverify %d %"PRIu64" %zu\n", rvp->disk->d_fd,
 				rv->io_start, len);
+		sz = disk_read_verify(rvp->disk, rvp->readbuf, rv->io_start,
+				len);
 		if (sz < 0) {
 			dbg_printf("IOERR %d %"PRIu64" %zu\n",
-					rv->io_disk->d_fd,
-					rv->io_start, len);
+					rvp->disk->d_fd, rv->io_start, len);
 			/* IO error, so try the next logical block. */
 			len = rvp->miniosz;
-			rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start, len,
+			rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start, len,
 					errno, rv->io_end_arg);
 		}
 
@@ -184,11 +185,11 @@ read_verify_queue(
 	bool				ret;
 
 	dbg_printf("verify fd %d start %"PRIu64" len %"PRIu64"\n",
-			rv->io_disk->d_fd, rv->io_start, rv->io_length);
+			rvp->disk->d_fd, rv->io_start, rv->io_length);
 
 	tmp = malloc(sizeof(struct read_verify));
 	if (!tmp) {
-		rvp->ioerr_fn(rvp->ctx, rv->io_disk, rv->io_start,
+		rvp->ioerr_fn(rvp->ctx, rvp->disk, rv->io_start,
 				rv->io_length, errno, rv->io_end_arg);
 		return true;
 	}
@@ -212,7 +213,6 @@ _("Could not queue read-verify work."));
 bool
 read_verify_schedule_io(
 	struct read_verify_pool		*rvp,
-	struct disk			*disk,
 	uint64_t			start,
 	uint64_t			length,
 	void				*end_arg)
@@ -231,7 +231,7 @@ read_verify_schedule_io(
 	 * reporting is the same, and the two extents are close,
 	 * we can combine them.
 	 */
-	if (rv->io_length > 0 && disk == rv->io_disk &&
+	if (rv->io_length > 0 &&
 	    end_arg == rv->io_end_arg &&
 	    ((start >= rv->io_start && start <= rv_end + RVP_IO_BATCH_LOCALITY) ||
 	     (rv->io_start >= start &&
@@ -244,7 +244,6 @@ read_verify_schedule_io(
 			return read_verify_queue(rvp, rv);
 
 		/* Stash the new IO. */
-		rv->io_disk = disk;
 		rv->io_start = start;
 		rv->io_length = length;
 		rv->io_end_arg = end_arg;
diff --git a/scrub/read_verify.h b/scrub/read_verify.h
index 1e7fd83f..5fabe5e0 100644
--- a/scrub/read_verify.h
+++ b/scrub/read_verify.h
@@ -8,6 +8,7 @@
 
 struct scrub_ctx;
 struct read_verify_pool;
+struct disk;
 
 /* Function called when an IO error happens. */
 typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx,
@@ -15,13 +16,14 @@ typedef void (*read_verify_ioerr_fn_t)(struct scrub_ctx *ctx,
 		int error, void *arg);
 
 struct read_verify_pool *read_verify_pool_init(struct scrub_ctx *ctx,
-		size_t miniosz, read_verify_ioerr_fn_t ioerr_fn,
-		unsigned int nproc, unsigned int submitter_threads);
+		struct disk *disk, size_t miniosz,
+		read_verify_ioerr_fn_t ioerr_fn,
+		unsigned int submitter_threads);
 void read_verify_pool_flush(struct read_verify_pool *rvp);
 void read_verify_pool_destroy(struct read_verify_pool *rvp);
 
-bool read_verify_schedule_io(struct read_verify_pool *rvp, struct disk *disk,
-		uint64_t start, uint64_t length, void *end_arg);
+bool read_verify_schedule_io(struct read_verify_pool *rvp, uint64_t start,
+		uint64_t length, void *end_arg);
 bool read_verify_force_io(struct read_verify_pool *rvp);
 uint64_t read_verify_bytes(struct read_verify_pool *rvp);
 

  parent reply	other threads:[~2019-03-01 23:27 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-01 23:26 [PATCH 00/23] xfsprogs-5.0: fix various problems Darrick J. Wong
2019-03-01 23:26 ` [PATCH 01/23] configure: use sys/xattr.h for fsetxattr detection Darrick J. Wong
2019-03-08  8:08   ` Christoph Hellwig
2019-03-01 23:27 ` [PATCH 02/23] xfs_io: actually check copy file range helper return values Darrick J. Wong
2019-03-08  8:09   ` Christoph Hellwig
2019-03-01 23:27 ` [PATCH 03/23] xfs_io: statx -r should print attributes_mask Darrick J. Wong
2019-03-08  8:09   ` Christoph Hellwig
2019-03-01 23:27 ` [PATCH 04/23] xfs_scrub_all: walk the lsblk device/fs hierarchy correctly Darrick J. Wong
2019-03-01 23:27 ` [PATCH 05/23] xfs_scrub_all.timer: activate after most of the system is up Darrick J. Wong
2019-03-01 23:27 ` [PATCH 06/23] xfs_scrub: rename the global nr_threads Darrick J. Wong
2019-03-01 23:27 ` [PATCH 07/23] xfs_scrub: use datadev parallelization estimates for thread count Darrick J. Wong
2019-03-01 23:27 ` [PATCH 08/23] xfs_scrub: don't expose internal pool state Darrick J. Wong
2019-03-01 23:27 ` Darrick J. Wong [this message]
2019-03-01 23:27 ` [PATCH 10/23] xfs_scrub: don't close mnt_fd when mnt_fd open fails Darrick J. Wong
2019-03-01 23:28 ` [PATCH 11/23] xfs_scrub: check label for misleading characters Darrick J. Wong
2019-03-01 23:28 ` [PATCH 12/23] mkfs: validate extent size hint parameters Darrick J. Wong
2019-03-01 23:28 ` [PATCH 13/23] xfs_repair: reinitialize the root directory nlink correctly Darrick J. Wong
2019-04-09 20:43   ` Eric Sandeen
2019-03-01 23:28 ` [PATCH 14/23] xfs_repair: bump the irec on-disk nlink when adding lost+found Darrick J. Wong
2019-03-01 23:28 ` [PATCH 15/23] xfs_repair: fix uninitialized variable warnings Darrick J. Wong
2019-03-01 23:28 ` [PATCH 16/23] xfs_db: fix finobt record decoding when sparse inodes enabled Darrick J. Wong
2019-03-01 23:28 ` [PATCH 17/23] xfs_db: use TYP_FINOBT for finobt metadump Darrick J. Wong
2019-03-01 23:28 ` [PATCH 18/23] xfs_info: use findmnt to handle mounted block devices Darrick J. Wong
2019-03-01 23:28 ` [PATCH 19/23] libfrog: hoist bitmap out of scrub Darrick J. Wong
2019-03-01 23:28 ` [PATCH 20/23] xfs_repair: correctly account for free space btree shrinks when fixing freelist Darrick J. Wong
2019-03-01 23:29 ` [PATCH 21/23] libxfs: free buffer log item in libxfs_trans_brelse Darrick J. Wong
2019-03-01 23:29 ` [PATCH 22/23] libxfs: free inode item when committing transaction Darrick J. Wong
2019-03-01 23:29 ` [PATCH 23/23] libxfs: free buffer and inode log items when cancelling a transaction Darrick J. Wong
2019-03-04 20:58 ` [PATCH 24/23] xfs_io: don't walk off the end of argv in fzero_f Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=155148287091.16677.17102964491448677196.stgit@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.