From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu
Date: Tue, 12 May 2020 19:28:08 +1000 [thread overview]
Message-ID: <20200512092811.1846252-3-david@fromorbit.com> (raw)
In-Reply-To: <20200512092811.1846252-1-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
It's a global atomic counter, and we are hitting it at a rate of
half a million transactions a second, so it's bouncing the counter
cacheline all over the place on large machines. Convert it to a
per-cpu counter.
And .... oh wow, that was unexpected!
Concurrent create, 50 million inodes, identical 16p/16GB virtual
machines on different physical hosts. Machine A has twice the CPU
cores per socket of machine B:
unpatched patched
machine A: 3m45s 2m27s
machine B: 4m13s 4m14s
Create rates:
unpatched patched
machine A: 246k+/-15k 384k+/-10k
machine B: 225k+/-13k 223k+/-11k
Concurrent rm of same 50 million inodes:
unpatched patched
machine A: 8m30s 3m09s
machine B: 5m02s 4m51s
The transaction rate on the fast machine went from about 250k/sec to
over 600k/sec, which indicates just how much of a bottleneck this
atomic counter was.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_mount.h | 2 +-
fs/xfs/xfs_super.c | 12 +++++++++---
fs/xfs/xfs_trans.c | 6 +++---
3 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 712b3e2583316..af3d8b71e9591 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -84,6 +84,7 @@ typedef struct xfs_mount {
* extents or anything related to the rt device.
*/
struct percpu_counter m_delalloc_blks;
+ struct percpu_counter m_active_trans; /* in progress xact counter */
struct xfs_buf *m_sb_bp; /* buffer for superblock */
char *m_rtname; /* realtime device name */
@@ -164,7 +165,6 @@ typedef struct xfs_mount {
uint64_t m_resblks; /* total reserved blocks */
uint64_t m_resblks_avail;/* available reserved blocks */
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
- atomic_t m_active_trans; /* number trans frozen */
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
struct delayed_work m_eofblocks_work; /* background eof blocks
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e80bd2c4c279e..bc4853525ce18 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -883,7 +883,7 @@ xfs_quiesce_attr(
int error = 0;
/* wait for all modifications to complete */
- while (atomic_read(&mp->m_active_trans) > 0)
+ while (percpu_counter_sum(&mp->m_active_trans) > 0)
delay(100);
/* force the log to unpin objects from the now complete transactions */
@@ -902,7 +902,7 @@ xfs_quiesce_attr(
* Just warn here till VFS can correctly support
* read-only remount without racing.
*/
- WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+ WARN_ON(percpu_counter_sum(&mp->m_active_trans) != 0);
xfs_log_quiesce(mp);
}
@@ -1027,8 +1027,14 @@ xfs_init_percpu_counters(
if (error)
goto free_fdblocks;
+ error = percpu_counter_init(&mp->m_active_trans, 0, GFP_KERNEL);
+ if (error)
+ goto free_delalloc_blocks;
+
return 0;
+free_delalloc_blocks:
+ percpu_counter_destroy(&mp->m_delalloc_blks);
free_fdblocks:
percpu_counter_destroy(&mp->m_fdblocks);
free_ifree:
@@ -1057,6 +1063,7 @@ xfs_destroy_percpu_counters(
ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
percpu_counter_sum(&mp->m_delalloc_blks) == 0);
percpu_counter_destroy(&mp->m_delalloc_blks);
+ percpu_counter_destroy(&mp->m_active_trans);
}
static void
@@ -1792,7 +1799,6 @@ static int xfs_init_fs_context(
INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
spin_lock_init(&mp->m_perag_lock);
mutex_init(&mp->m_growlock);
- atomic_set(&mp->m_active_trans, 0);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28b983ff8b113..636df5017782e 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -68,7 +68,7 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_);
- atomic_dec(&tp->t_mountp->m_active_trans);
+ percpu_counter_dec(&tp->t_mountp->m_active_trans);
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp);
@@ -126,7 +126,7 @@ xfs_trans_dup(
xfs_trans_dup_dqinfo(tp, ntp);
- atomic_inc(&tp->t_mountp->m_active_trans);
+ percpu_counter_inc(&tp->t_mountp->m_active_trans);
return ntp;
}
@@ -275,7 +275,7 @@ xfs_trans_alloc(
*/
WARN_ON(resp->tr_logres > 0 &&
mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
- atomic_inc(&mp->m_active_trans);
+ percpu_counter_inc(&mp->m_active_trans);
tp->t_magic = XFS_TRANS_HEADER_MAGIC;
tp->t_flags = flags;
--
2.26.1.301.g55bc3eb7cb9
next prev parent reply other threads:[~2020-05-12 9:28 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-12 9:28 [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner
2020-05-12 9:28 ` [PATCH 1/5] xfs: separate read-only variables in struct xfs_mount Dave Chinner
2020-05-12 12:30 ` Brian Foster
2020-05-12 16:09 ` Darrick J. Wong
2020-05-12 21:43 ` Dave Chinner
2020-05-12 21:53 ` Dave Chinner
2020-05-12 9:28 ` Dave Chinner [this message]
2020-05-12 12:31 ` [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu Brian Foster
2020-05-12 9:28 ` [PATCH 3/5] [RFC] xfs: use percpu counters for CIL context counters Dave Chinner
2020-05-12 14:05 ` Brian Foster
2020-05-12 23:36 ` Dave Chinner
2020-05-13 12:09 ` Brian Foster
2020-05-13 21:52 ` Dave Chinner
2020-05-14 1:50 ` Dave Chinner
2020-05-14 2:49 ` Dave Chinner
2020-05-14 13:43 ` Brian Foster
2020-05-12 9:28 ` [PATCH 4/5] [RFC] xfs: per-cpu CIL lists Dave Chinner
2020-05-13 17:02 ` Brian Foster
2020-05-13 23:33 ` Dave Chinner
2020-05-14 13:44 ` Brian Foster
2020-05-14 22:46 ` Dave Chinner
2020-05-15 17:26 ` Brian Foster
2020-05-18 0:30 ` Dave Chinner
2020-05-12 9:28 ` [PATCH 5/5] [RFC] xfs: make CIl busy extent lists per-cpu Dave Chinner
2020-05-12 10:25 ` [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200512092811.1846252-3-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.