linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 1/7] xfs: increase the default parallelism levels of pwork clients
Date: Mon, 11 Jan 2021 15:23:21 -0800	[thread overview]
Message-ID: <161040740189.1582286.17385075679159461086.stgit@magnolia> (raw)
In-Reply-To: <161040739544.1582286.11068012972712089066.stgit@magnolia>

From: Darrick J. Wong <djwong@kernel.org>

Increase the default parallelism level for pwork clients so that we can
take advantage of computers with a lot of CPUs and a lot of hardware.
The posteof/cowblocks cleanup series will use the functionality
presented in this patch to constrain the number of background per-ag gc
threads to our best estimate of the amount of parallelism that the
filesystem can sustain.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf.c   |   34 ++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_buf.h   |    1 +
 fs/xfs/xfs_iwalk.c |    2 +-
 fs/xfs/xfs_mount.c |   39 +++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_mount.h |    1 +
 fs/xfs/xfs_pwork.c |   17 +++++------------
 fs/xfs/xfs_pwork.h |    2 +-
 7 files changed, 82 insertions(+), 14 deletions(-)


diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f8400bbd6473..10d05c4522c9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2384,3 +2384,37 @@ xfs_verify_magic16(
 		return false;
 	return dmagic == bp->b_ops->magic16[idx];
 }
+
+/* Estimate the amount of parallelism available for a given device. */
+unsigned int
+xfs_buftarg_guess_threads(
+	struct xfs_buftarg	*btp)
+{
+	int			iomin;
+	int			ioopt;
+
+	/*
+	 * The device tells us that it is non-rotational, and we take that to
+	 * mean there are no moving parts and that the device can handle all
+	 * the CPUs throwing IO requests at it.
+	 */
+	if (blk_queue_nonrot(btp->bt_bdev->bd_disk->queue))
+		return num_online_cpus();
+
+	/*
+	 * The device has a preferred and minimum IO size that suggest a RAID
+	 * setup, so infer the number of disks and assume that the parallelism
+	 * is equal to the disk count.
+	 */
+	iomin = bdev_io_min(btp->bt_bdev);
+	ioopt = bdev_io_opt(btp->bt_bdev);
+	if (iomin > 0 && ioopt > iomin)
+		return ioopt / iomin;
+
+	/*
+	 * The device did not indicate that it has any capabilities beyond that
+	 * of a rotating disk with a single drive head, so we estimate no
+	 * parallelism at all.
+	 */
+	return 1;
+}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5d91a31298a4..fb0e0d89962c 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -349,6 +349,7 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
 extern void xfs_free_buftarg(struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
+unsigned int xfs_buftarg_guess_threads(struct xfs_buftarg *btp);
 
 #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)
 #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index eae3aff9bc97..2ab07d58c901 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -624,7 +624,7 @@ xfs_iwalk_threaded(
 	ASSERT(agno < mp->m_sb.sb_agcount);
 	ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
 
-	nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+	nr_threads = xfs_pwork_guess_threads(mp);
 	error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
 			nr_threads);
 	if (error)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7110507a2b6b..1e974106e58c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1358,3 +1358,42 @@ xfs_mod_delalloc(
 	percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
 			XFS_DELALLOC_BATCH);
 }
+
+/*
+ * Estimate the amount of parallelism that is available for metadata operations
+ * on this filesystem.
+ */
+unsigned int
+xfs_guess_metadata_threads(
+	struct xfs_mount	*mp)
+{
+	unsigned int		threads;
+
+	/*
+	 * Estimate the amount of parallelism for metadata operations from the
+	 * least capable of the two devices that handle metadata.  Cap that
+	 * estimate to the number of AGs to avoid unnecessary lock contention.
+	 */
+	threads = xfs_buftarg_guess_threads(mp->m_ddev_targp);
+	if (mp->m_logdev_targp != mp->m_ddev_targp)
+		threads = min(xfs_buftarg_guess_threads(mp->m_logdev_targp),
+			      threads);
+	threads = min(mp->m_sb.sb_agcount, threads);
+
+	/* If the storage told us it has fancy capabilities, we're done. */
+	if (threads > 1)
+		goto clamp;
+
+	/*
+	 * Metadata storage did not even hint that it has any parallel
+	 * capability.  If the filesystem was formatted with a stripe unit and
+	 * width, we'll treat that as evidence of a RAID setup and estimate
+	 * the number of disks.
+	 */
+	if (mp->m_sb.sb_unit > 0 && mp->m_sb.sb_width > mp->m_sb.sb_unit)
+		threads = mp->m_sb.sb_width / mp->m_sb.sb_unit;
+
+clamp:
+	/* Don't return an estimate larger than the CPU count. */
+	return min(num_online_cpus(), threads);
+}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index dfa429b77ee2..70f6c68c795f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -426,5 +426,6 @@ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
 		int error_class, int error);
 void xfs_force_summary_recalc(struct xfs_mount *mp);
 void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
+unsigned int xfs_guess_metadata_threads(struct xfs_mount *mp);
 
 #endif	/* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
index b03333f1c84a..5f1a5e575a48 100644
--- a/fs/xfs/xfs_pwork.c
+++ b/fs/xfs/xfs_pwork.c
@@ -118,19 +118,12 @@ xfs_pwork_poll(
 		touch_softlockup_watchdog();
 }
 
-/*
- * Return the amount of parallelism that the data device can handle, or 0 for
- * no limit.
- */
+/* Estimate how many threads we need for a parallel work queue. */
 unsigned int
-xfs_pwork_guess_datadev_parallelism(
+xfs_pwork_guess_threads(
 	struct xfs_mount	*mp)
 {
-	struct xfs_buftarg	*btp = mp->m_ddev_targp;
-
-	/*
-	 * For now we'll go with the most conservative setting possible,
-	 * which is two threads for an SSD and 1 thread everywhere else.
-	 */
-	return blk_queue_nonrot(btp->bt_bdev->bd_disk->queue) ? 2 : 1;
+	/* pwork queues are not unbounded, so we have to abide WQ_MAX_ACTIVE. */
+	return min_t(unsigned int, xfs_guess_metadata_threads(mp),
+			WQ_MAX_ACTIVE);
 }
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
index 8133124cf3bb..f402920f7061 100644
--- a/fs/xfs/xfs_pwork.h
+++ b/fs/xfs/xfs_pwork.h
@@ -56,6 +56,6 @@ int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
 void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
 int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
 void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
-unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+unsigned int xfs_pwork_guess_threads(struct xfs_mount *mp);
 
 #endif /* __XFS_PWORK_H__ */


  reply	other threads:[~2021-01-12  0:30 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-11 23:23 [PATCHSET v2 0/7] xfs: consolidate posteof and cowblocks cleanup Darrick J. Wong
2021-01-11 23:23 ` Darrick J. Wong [this message]
2021-01-13 14:49   ` [PATCH 1/7] xfs: increase the default parallelism levels of pwork clients Christoph Hellwig
2021-01-14 21:32     ` Darrick J. Wong
2021-01-14 22:38       ` Darrick J. Wong
2021-01-18 17:36         ` Christoph Hellwig
2021-01-18 19:57           ` Darrick J. Wong
2021-01-19 16:37             ` Christoph Hellwig
2021-01-19 19:17               ` Darrick J. Wong
2021-01-11 23:23 ` [PATCH 2/7] xfs: refactor the predicate part of xfs_free_eofblocks Darrick J. Wong
2021-01-13 14:57   ` Christoph Hellwig
2021-01-14 22:49     ` Darrick J. Wong
2021-01-18 17:38       ` Christoph Hellwig
2021-01-11 23:23 ` [PATCH 3/7] xfs: consolidate incore inode radix tree posteof/cowblocks tags Darrick J. Wong
2021-01-13 14:59   ` Christoph Hellwig
2021-01-11 23:23 ` [PATCH 4/7] xfs: consolidate the eofblocks and cowblocks workers Darrick J. Wong
2021-01-13 15:04   ` Christoph Hellwig
2021-01-13 23:53     ` Darrick J. Wong
2021-01-11 23:23 ` [PATCH 5/7] xfs: only walk the incore inode tree once per blockgc scan Darrick J. Wong
2021-01-13 15:06   ` Christoph Hellwig
2021-01-13 20:41     ` Darrick J. Wong
2021-01-11 23:23 ` [PATCH 6/7] xfs: rename block gc start and stop functions Darrick J. Wong
2021-01-13 15:07   ` Christoph Hellwig
2021-01-11 23:23 ` [PATCH 7/7] xfs: parallelize block preallocation garbage collection Darrick J. Wong
2021-01-13 15:09   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=161040740189.1582286.17385075679159461086.stgit@magnolia \
    --to=djwong@kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).