All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: Dave Chinner <david@fromorbit.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu
Date: Tue, 12 May 2020 08:31:36 -0400	[thread overview]
Message-ID: <20200512123136.GB37029@bfoster> (raw)
In-Reply-To: <20200512092811.1846252-3-david@fromorbit.com>

On Tue, May 12, 2020 at 07:28:08PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> It's a global atomic counter, and we are hitting it at a rate of
> half a million transactions a second, so it's bouncing the counter
> cacheline all over the place on large machines. Convert it to a
> per-cpu counter.
> 
> And .... oh wow, that was unexpected!
> 
> Concurrent create, 50 million inodes, identical 16p/16GB virtual
> machines on different physical hosts. Machine A has twice the CPU
> cores per socket of machine B:
> 
> 		unpatched	patched
> machine A:	3m45s		2m27s
> machine B:	4m13s		4m14s
> 
> Create rates:
> 		unpatched	patched
> machine A:	246k+/-15k	384k+/-10k
> machine B:	225k+/-13k	223k+/-11k
> 
> Concurrent rm of same 50 million inodes:
> 
> 		unpatched	patched
> machine A:	8m30s		3m09s
> machine B:	5m02s		4m51s
> 
> The transaction rate on the fast machine went from about 250k/sec to
> over 600k/sec, which indicates just how much of a bottleneck this
> atomic counter was.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---

Looks fairly straightforward. We're increasing the size of xfs_mount,
but it's already over a 4k page and there's only one per-mount:

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_mount.h |  2 +-
>  fs/xfs/xfs_super.c | 12 +++++++++---
>  fs/xfs/xfs_trans.c |  6 +++---
>  3 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 712b3e2583316..af3d8b71e9591 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -84,6 +84,7 @@ typedef struct xfs_mount {
>  	 * extents or anything related to the rt device.
>  	 */
>  	struct percpu_counter	m_delalloc_blks;
> +	struct percpu_counter	m_active_trans;	/* in progress xact counter */
>  
>  	struct xfs_buf		*m_sb_bp;	/* buffer for superblock */
>  	char			*m_rtname;	/* realtime device name */
> @@ -164,7 +165,6 @@ typedef struct xfs_mount {
>  	uint64_t		m_resblks;	/* total reserved blocks */
>  	uint64_t		m_resblks_avail;/* available reserved blocks */
>  	uint64_t		m_resblks_save;	/* reserved blks @ remount,ro */
> -	atomic_t		m_active_trans;	/* number trans frozen */
>  	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
>  	struct delayed_work	m_reclaim_work;	/* background inode reclaim */
>  	struct delayed_work	m_eofblocks_work; /* background eof blocks
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index e80bd2c4c279e..bc4853525ce18 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -883,7 +883,7 @@ xfs_quiesce_attr(
>  	int	error = 0;
>  
>  	/* wait for all modifications to complete */
> -	while (atomic_read(&mp->m_active_trans) > 0)
> +	while (percpu_counter_sum(&mp->m_active_trans) > 0)
>  		delay(100);
>  
>  	/* force the log to unpin objects from the now complete transactions */
> @@ -902,7 +902,7 @@ xfs_quiesce_attr(
>  	 * Just warn here till VFS can correctly support
>  	 * read-only remount without racing.
>  	 */
> -	WARN_ON(atomic_read(&mp->m_active_trans) != 0);
> +	WARN_ON(percpu_counter_sum(&mp->m_active_trans) != 0);
>  
>  	xfs_log_quiesce(mp);
>  }
> @@ -1027,8 +1027,14 @@ xfs_init_percpu_counters(
>  	if (error)
>  		goto free_fdblocks;
>  
> +	error = percpu_counter_init(&mp->m_active_trans, 0, GFP_KERNEL);
> +	if (error)
> +		goto free_delalloc_blocks;
> +
>  	return 0;
>  
> +free_delalloc_blocks:
> +	percpu_counter_destroy(&mp->m_delalloc_blks);
>  free_fdblocks:
>  	percpu_counter_destroy(&mp->m_fdblocks);
>  free_ifree:
> @@ -1057,6 +1063,7 @@ xfs_destroy_percpu_counters(
>  	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
>  	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
>  	percpu_counter_destroy(&mp->m_delalloc_blks);
> +	percpu_counter_destroy(&mp->m_active_trans);
>  }
>  
>  static void
> @@ -1792,7 +1799,6 @@ static int xfs_init_fs_context(
>  	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
>  	spin_lock_init(&mp->m_perag_lock);
>  	mutex_init(&mp->m_growlock);
> -	atomic_set(&mp->m_active_trans, 0);
>  	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
>  	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
>  	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
> diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
> index 28b983ff8b113..636df5017782e 100644
> --- a/fs/xfs/xfs_trans.c
> +++ b/fs/xfs/xfs_trans.c
> @@ -68,7 +68,7 @@ xfs_trans_free(
>  	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
>  
>  	trace_xfs_trans_free(tp, _RET_IP_);
> -	atomic_dec(&tp->t_mountp->m_active_trans);
> +	percpu_counter_dec(&tp->t_mountp->m_active_trans);
>  	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
>  		sb_end_intwrite(tp->t_mountp->m_super);
>  	xfs_trans_free_dqinfo(tp);
> @@ -126,7 +126,7 @@ xfs_trans_dup(
>  
>  	xfs_trans_dup_dqinfo(tp, ntp);
>  
> -	atomic_inc(&tp->t_mountp->m_active_trans);
> +	percpu_counter_inc(&tp->t_mountp->m_active_trans);
>  	return ntp;
>  }
>  
> @@ -275,7 +275,7 @@ xfs_trans_alloc(
>  	 */
>  	WARN_ON(resp->tr_logres > 0 &&
>  		mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
> -	atomic_inc(&mp->m_active_trans);
> +	percpu_counter_inc(&mp->m_active_trans);
>  
>  	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
>  	tp->t_flags = flags;
> -- 
> 2.26.1.301.g55bc3eb7cb9
> 


  reply	other threads:[~2020-05-12 12:31 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-12  9:28 [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner
2020-05-12  9:28 ` [PATCH 1/5] xfs: separate read-only variables in struct xfs_mount Dave Chinner
2020-05-12 12:30   ` Brian Foster
2020-05-12 16:09     ` Darrick J. Wong
2020-05-12 21:43       ` Dave Chinner
2020-05-12 21:53     ` Dave Chinner
2020-05-12  9:28 ` [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu Dave Chinner
2020-05-12 12:31   ` Brian Foster [this message]
2020-05-12  9:28 ` [PATCH 3/5] [RFC] xfs: use percpu counters for CIL context counters Dave Chinner
2020-05-12 14:05   ` Brian Foster
2020-05-12 23:36     ` Dave Chinner
2020-05-13 12:09       ` Brian Foster
2020-05-13 21:52         ` Dave Chinner
2020-05-14  1:50           ` Dave Chinner
2020-05-14  2:49             ` Dave Chinner
2020-05-14 13:43           ` Brian Foster
2020-05-12  9:28 ` [PATCH 4/5] [RFC] xfs: per-cpu CIL lists Dave Chinner
2020-05-13 17:02   ` Brian Foster
2020-05-13 23:33     ` Dave Chinner
2020-05-14 13:44       ` Brian Foster
2020-05-14 22:46         ` Dave Chinner
2020-05-15 17:26           ` Brian Foster
2020-05-18  0:30             ` Dave Chinner
2020-05-12  9:28 ` [PATCH 5/5] [RFC] xfs: make CIl busy extent lists per-cpu Dave Chinner
2020-05-12 10:25 ` [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200512123136.GB37029@bfoster \
    --to=bfoster@redhat.com \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.