All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu
Date: Tue, 12 May 2020 19:28:08 +1000	[thread overview]
Message-ID: <20200512092811.1846252-3-david@fromorbit.com> (raw)
In-Reply-To: <20200512092811.1846252-1-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

It's a global atomic counter, and we are hitting it at a rate of
half a million transactions a second, so it's bouncing the counter
cacheline all over the place on large machines. Convert it to a
per-cpu counter.

And .... oh wow, that was unexpected!

Concurrent create, 50 million inodes, identical 16p/16GB virtual
machines on different physical hosts. Machine A has twice the CPU
cores per socket of machine B:

		unpatched	patched
machine A:	3m45s		2m27s
machine B:	4m13s		4m14s

Create rates:
		unpatched	patched
machine A:	246k+/-15k	384k+/-10k
machine B:	225k+/-13k	223k+/-11k

Concurrent rm of same 50 million inodes:

		unpatched	patched
machine A:	8m30s		3m09s
machine B:	5m02s		4m51s

The transaction rate on the fast machine went from about 250k/sec to
over 600k/sec, which indicates just how much of a bottleneck this
atomic counter was.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_mount.h |  2 +-
 fs/xfs/xfs_super.c | 12 +++++++++---
 fs/xfs/xfs_trans.c |  6 +++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 712b3e2583316..af3d8b71e9591 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -84,6 +84,7 @@ typedef struct xfs_mount {
 	 * extents or anything related to the rt device.
 	 */
 	struct percpu_counter	m_delalloc_blks;
+	struct percpu_counter	m_active_trans;	/* in progress xact counter */
 
 	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
 	char			*m_rtname;	/* realtime device name */
@@ -164,7 +165,6 @@ typedef struct xfs_mount {
 	uint64_t		m_resblks;	/* total reserved blocks */
 	uint64_t		m_resblks_avail;/* available reserved blocks */
 	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
-	atomic_t		m_active_trans;	/* number trans frozen */
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
 	struct delayed_work	m_eofblocks_work; /* background eof blocks
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e80bd2c4c279e..bc4853525ce18 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -883,7 +883,7 @@ xfs_quiesce_attr(
 	int	error = 0;
 
 	/* wait for all modifications to complete */
-	while (atomic_read(&mp->m_active_trans) > 0)
+	while (percpu_counter_sum(&mp->m_active_trans) > 0)
 		delay(100);
 
 	/* force the log to unpin objects from the now complete transactions */
@@ -902,7 +902,7 @@ xfs_quiesce_attr(
 	 * Just warn here till VFS can correctly support
 	 * read-only remount without racing.
 	 */
-	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+	WARN_ON(percpu_counter_sum(&mp->m_active_trans) != 0);
 
 	xfs_log_quiesce(mp);
 }
@@ -1027,8 +1027,14 @@ xfs_init_percpu_counters(
 	if (error)
 		goto free_fdblocks;
 
+	error = percpu_counter_init(&mp->m_active_trans, 0, GFP_KERNEL);
+	if (error)
+		goto free_delalloc_blocks;
+
 	return 0;
 
+free_delalloc_blocks:
+	percpu_counter_destroy(&mp->m_delalloc_blks);
 free_fdblocks:
 	percpu_counter_destroy(&mp->m_fdblocks);
 free_ifree:
@@ -1057,6 +1063,7 @@ xfs_destroy_percpu_counters(
 	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
 	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
 	percpu_counter_destroy(&mp->m_delalloc_blks);
+	percpu_counter_destroy(&mp->m_active_trans);
 }
 
 static void
@@ -1792,7 +1799,6 @@ static int xfs_init_fs_context(
 	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
 	spin_lock_init(&mp->m_perag_lock);
 	mutex_init(&mp->m_growlock);
-	atomic_set(&mp->m_active_trans, 0);
 	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
 	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 28b983ff8b113..636df5017782e 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -68,7 +68,7 @@ xfs_trans_free(
 	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
 	trace_xfs_trans_free(tp, _RET_IP_);
-	atomic_dec(&tp->t_mountp->m_active_trans);
+	percpu_counter_dec(&tp->t_mountp->m_active_trans);
 	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
 		sb_end_intwrite(tp->t_mountp->m_super);
 	xfs_trans_free_dqinfo(tp);
@@ -126,7 +126,7 @@ xfs_trans_dup(
 
 	xfs_trans_dup_dqinfo(tp, ntp);
 
-	atomic_inc(&tp->t_mountp->m_active_trans);
+	percpu_counter_inc(&tp->t_mountp->m_active_trans);
 	return ntp;
 }
 
@@ -275,7 +275,7 @@ xfs_trans_alloc(
 	 */
 	WARN_ON(resp->tr_logres > 0 &&
 		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
-	atomic_inc(&mp->m_active_trans);
+	percpu_counter_inc(&mp->m_active_trans);
 
 	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
 	tp->t_flags = flags;
-- 
2.26.1.301.g55bc3eb7cb9


  parent reply	other threads:[~2020-05-12  9:28 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-12  9:28 [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner
2020-05-12  9:28 ` [PATCH 1/5] xfs: separate read-only variables in struct xfs_mount Dave Chinner
2020-05-12 12:30   ` Brian Foster
2020-05-12 16:09     ` Darrick J. Wong
2020-05-12 21:43       ` Dave Chinner
2020-05-12 21:53     ` Dave Chinner
2020-05-12  9:28 ` Dave Chinner [this message]
2020-05-12 12:31   ` [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu Brian Foster
2020-05-12  9:28 ` [PATCH 3/5] [RFC] xfs: use percpu counters for CIL context counters Dave Chinner
2020-05-12 14:05   ` Brian Foster
2020-05-12 23:36     ` Dave Chinner
2020-05-13 12:09       ` Brian Foster
2020-05-13 21:52         ` Dave Chinner
2020-05-14  1:50           ` Dave Chinner
2020-05-14  2:49             ` Dave Chinner
2020-05-14 13:43           ` Brian Foster
2020-05-12  9:28 ` [PATCH 4/5] [RFC] xfs: per-cpu CIL lists Dave Chinner
2020-05-13 17:02   ` Brian Foster
2020-05-13 23:33     ` Dave Chinner
2020-05-14 13:44       ` Brian Foster
2020-05-14 22:46         ` Dave Chinner
2020-05-15 17:26           ` Brian Foster
2020-05-18  0:30             ` Dave Chinner
2020-05-12  9:28 ` [PATCH 5/5] [RFC] xfs: make CIl busy extent lists per-cpu Dave Chinner
2020-05-12 10:25 ` [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200512092811.1846252-3-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.