From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org
Cc: linux-xfs@vger.kernel.org
Subject: [PATCH 1/7] xfs: increase the default parallelism levels of pwork clients
Date: Mon, 11 Jan 2021 15:23:21 -0800 [thread overview]
Message-ID: <161040740189.1582286.17385075679159461086.stgit@magnolia> (raw)
In-Reply-To: <161040739544.1582286.11068012972712089066.stgit@magnolia>
From: Darrick J. Wong <djwong@kernel.org>
Increase the default parallelism level for pwork clients so that we can
take advantage of computers with a lot of CPUs and a lot of hardware.
The posteof/cowblocks cleanup series will use the functionality
presented in this patch to constrain the number of background per-ag gc
threads to our best estimate of the amount of parallelism that the
filesystem can sustain.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_buf.c | 34 ++++++++++++++++++++++++++++++++++
fs/xfs/xfs_buf.h | 1 +
fs/xfs/xfs_iwalk.c | 2 +-
fs/xfs/xfs_mount.c | 39 +++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_mount.h | 1 +
fs/xfs/xfs_pwork.c | 17 +++++------------
fs/xfs/xfs_pwork.h | 2 +-
7 files changed, 82 insertions(+), 14 deletions(-)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f8400bbd6473..10d05c4522c9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2384,3 +2384,37 @@ xfs_verify_magic16(
return false;
return dmagic == bp->b_ops->magic16[idx];
}
+
+/* Estimate the amount of parallelism available for a given device. */
+unsigned int
+xfs_buftarg_guess_threads(
+ struct xfs_buftarg *btp)
+{
+ int iomin;
+ int ioopt;
+
+ /*
+ * The device tells us that it is non-rotational, and we take that to
+ * mean there are no moving parts and that the device can handle all
+ * the CPUs throwing IO requests at it.
+ */
+ if (blk_queue_nonrot(btp->bt_bdev->bd_disk->queue))
+ return num_online_cpus();
+
+ /*
+ * The device has a preferred and minimum IO size that suggest a RAID
+ * setup, so infer the number of disks and assume that the parallelism
+ * is equal to the disk count.
+ */
+ iomin = bdev_io_min(btp->bt_bdev);
+ ioopt = bdev_io_opt(btp->bt_bdev);
+ if (iomin > 0 && ioopt > iomin)
+ return ioopt / iomin;
+
+ /*
+ * The device did not indicate that it has any capabilities beyond that
+ * of a rotating disk with a single drive head, so we estimate no
+ * parallelism at all.
+ */
+ return 1;
+}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5d91a31298a4..fb0e0d89962c 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -349,6 +349,7 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
+unsigned int xfs_buftarg_guess_threads(struct xfs_buftarg *btp);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index eae3aff9bc97..2ab07d58c901 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -624,7 +624,7 @@ xfs_iwalk_threaded(
ASSERT(agno < mp->m_sb.sb_agcount);
ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
- nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+ nr_threads = xfs_pwork_guess_threads(mp);
error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
nr_threads);
if (error)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 7110507a2b6b..1e974106e58c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1358,3 +1358,42 @@ xfs_mod_delalloc(
percpu_counter_add_batch(&mp->m_delalloc_blks, delta,
XFS_DELALLOC_BATCH);
}
+
+/*
+ * Estimate the amount of parallelism that is available for metadata operations
+ * on this filesystem.
+ */
+unsigned int
+xfs_guess_metadata_threads(
+ struct xfs_mount *mp)
+{
+ unsigned int threads;
+
+ /*
+ * Estimate the amount of parallelism for metadata operations from the
+ * least capable of the two devices that handle metadata. Cap that
+ * estimate to the number of AGs to avoid unnecessary lock contention.
+ */
+ threads = xfs_buftarg_guess_threads(mp->m_ddev_targp);
+ if (mp->m_logdev_targp != mp->m_ddev_targp)
+ threads = min(xfs_buftarg_guess_threads(mp->m_logdev_targp),
+ threads);
+ threads = min(mp->m_sb.sb_agcount, threads);
+
+ /* If the storage told us it has fancy capabilities, we're done. */
+ if (threads > 1)
+ goto clamp;
+
+ /*
+ * Metadata storage did not even hint that it has any parallel
+ * capability. If the filesystem was formatted with a stripe unit and
+ * width, we'll treat that as evidence of a RAID setup and estimate
+ * the number of disks.
+ */
+ if (mp->m_sb.sb_unit > 0 && mp->m_sb.sb_width > mp->m_sb.sb_unit)
+ threads = mp->m_sb.sb_width / mp->m_sb.sb_unit;
+
+clamp:
+ /* Don't return an estimate larger than the CPU count. */
+ return min(num_online_cpus(), threads);
+}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index dfa429b77ee2..70f6c68c795f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -426,5 +426,6 @@ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
int error_class, int error);
void xfs_force_summary_recalc(struct xfs_mount *mp);
void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta);
+unsigned int xfs_guess_metadata_threads(struct xfs_mount *mp);
#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
index b03333f1c84a..5f1a5e575a48 100644
--- a/fs/xfs/xfs_pwork.c
+++ b/fs/xfs/xfs_pwork.c
@@ -118,19 +118,12 @@ xfs_pwork_poll(
touch_softlockup_watchdog();
}
-/*
- * Return the amount of parallelism that the data device can handle, or 0 for
- * no limit.
- */
+/* Estimate how many threads we need for a parallel work queue. */
unsigned int
-xfs_pwork_guess_datadev_parallelism(
+xfs_pwork_guess_threads(
struct xfs_mount *mp)
{
- struct xfs_buftarg *btp = mp->m_ddev_targp;
-
- /*
- * For now we'll go with the most conservative setting possible,
- * which is two threads for an SSD and 1 thread everywhere else.
- */
- return blk_queue_nonrot(btp->bt_bdev->bd_disk->queue) ? 2 : 1;
+ /* pwork queues are not unbounded, so we have to abide WQ_MAX_ACTIVE. */
+ return min_t(unsigned int, xfs_guess_metadata_threads(mp),
+ WQ_MAX_ACTIVE);
}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
index 8133124cf3bb..f402920f7061 100644
--- a/fs/xfs/xfs_pwork.h
+++ b/fs/xfs/xfs_pwork.h
@@ -56,6 +56,6 @@ int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
-unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+unsigned int xfs_pwork_guess_threads(struct xfs_mount *mp);
#endif /* __XFS_PWORK_H__ */
next prev parent reply other threads:[~2021-01-12 0:30 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-11 23:23 [PATCHSET v2 0/7] xfs: consolidate posteof and cowblocks cleanup Darrick J. Wong
2021-01-11 23:23 ` Darrick J. Wong [this message]
2021-01-13 14:49 ` [PATCH 1/7] xfs: increase the default parallelism levels of pwork clients Christoph Hellwig
2021-01-14 21:32 ` Darrick J. Wong
2021-01-14 22:38 ` Darrick J. Wong
2021-01-18 17:36 ` Christoph Hellwig
2021-01-18 19:57 ` Darrick J. Wong
2021-01-19 16:37 ` Christoph Hellwig
2021-01-19 19:17 ` Darrick J. Wong
2021-01-11 23:23 ` [PATCH 2/7] xfs: refactor the predicate part of xfs_free_eofblocks Darrick J. Wong
2021-01-13 14:57 ` Christoph Hellwig
2021-01-14 22:49 ` Darrick J. Wong
2021-01-18 17:38 ` Christoph Hellwig
2021-01-11 23:23 ` [PATCH 3/7] xfs: consolidate incore inode radix tree posteof/cowblocks tags Darrick J. Wong
2021-01-13 14:59 ` Christoph Hellwig
2021-01-11 23:23 ` [PATCH 4/7] xfs: consolidate the eofblocks and cowblocks workers Darrick J. Wong
2021-01-13 15:04 ` Christoph Hellwig
2021-01-13 23:53 ` Darrick J. Wong
2021-01-11 23:23 ` [PATCH 5/7] xfs: only walk the incore inode tree once per blockgc scan Darrick J. Wong
2021-01-13 15:06 ` Christoph Hellwig
2021-01-13 20:41 ` Darrick J. Wong
2021-01-11 23:23 ` [PATCH 6/7] xfs: rename block gc start and stop functions Darrick J. Wong
2021-01-13 15:07 ` Christoph Hellwig
2021-01-11 23:23 ` [PATCH 7/7] xfs: parallelize block preallocation garbage collection Darrick J. Wong
2021-01-13 15:09 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=161040740189.1582286.17385075679159461086.stgit@magnolia \
--to=djwong@kernel.org \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).