From: Brian Foster <bfoster@redhat.com>
To: Dave Chinner <david@fromorbit.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu
Date: Tue, 12 May 2020 08:31:36 -0400 [thread overview]
Message-ID: <20200512123136.GB37029@bfoster> (raw)
In-Reply-To: <20200512092811.1846252-3-david@fromorbit.com>
On Tue, May 12, 2020 at 07:28:08PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> It's a global atomic counter, and we are hitting it at a rate of
> half a million transactions a second, so it's bouncing the counter
> cacheline all over the place on large machines. Convert it to a
> per-cpu counter.
>
> And .... oh wow, that was unexpected!
>
> Concurrent create, 50 million inodes, identical 16p/16GB virtual
> machines on different physical hosts. Machine A has twice the CPU
> cores per socket of machine B:
>
> unpatched patched
> machine A: 3m45s 2m27s
> machine B: 4m13s 4m14s
>
> Create rates:
> unpatched patched
> machine A: 246k+/-15k 384k+/-10k
> machine B: 225k+/-13k 223k+/-11k
>
> Concurrent rm of same 50 million inodes:
>
> unpatched patched
> machine A: 8m30s 3m09s
> machine B: 5m02s 4m51s
>
> The transaction rate on the fast machine went from about 250k/sec to
> over 600k/sec, which indicates just how much of a bottleneck this
> atomic counter was.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
Looks fairly straightforward. We're increasing the size of xfs_mount,
but it's already over a 4k page and there's only one per-mount:
Reviewed-by: Brian Foster <bfoster@redhat.com>
> fs/xfs/xfs_mount.h | 2 +-
> fs/xfs/xfs_super.c | 12 +++++++++---
> fs/xfs/xfs_trans.c | 6 +++---
> 3 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 712b3e2583316..af3d8b71e9591 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -84,6 +84,7 @@ typedef struct xfs_mount {
> * extents or anything related to the rt device.
> */
> struct percpu_counter m_delalloc_blks;
> + struct percpu_counter m_active_trans; /* in progress xact counter */
>
> struct xfs_buf *m_sb_bp; /* buffer for superblock */
> char *m_rtname; /* realtime device name */
> @@ -164,7 +165,6 @@ typedef struct xfs_mount {
> uint64_t m_resblks; /* total reserved blocks */
> uint64_t m_resblks_avail;/* available reserved blocks */
> uint64_t m_resblks_save; /* reserved blks @ remount,ro */
> - atomic_t m_active_trans; /* number trans frozen */
> struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
> struct delayed_work m_reclaim_work; /* background inode reclaim */
> struct delayed_work m_eofblocks_work; /* background eof blocks
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index e80bd2c4c279e..bc4853525ce18 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -883,7 +883,7 @@ xfs_quiesce_attr(
> int error = 0;
>
> /* wait for all modifications to complete */
> - while (atomic_read(&mp->m_active_trans) > 0)
> + while (percpu_counter_sum(&mp->m_active_trans) > 0)
> delay(100);
>
> /* force the log to unpin objects from the now complete transactions */
> @@ -902,7 +902,7 @@ xfs_quiesce_attr(
> * Just warn here till VFS can correctly support
> * read-only remount without racing.
> */
> - WARN_ON(atomic_read(&mp->m_active_trans) != 0);
> + WARN_ON(percpu_counter_sum(&mp->m_active_trans) != 0);
>
> xfs_log_quiesce(mp);
> }
> @@ -1027,8 +1027,14 @@ xfs_init_percpu_counters(
> if (error)
> goto free_fdblocks;
>
> + error = percpu_counter_init(&mp->m_active_trans, 0, GFP_KERNEL);
> + if (error)
> + goto free_delalloc_blocks;
> +
> return 0;
>
> +free_delalloc_blocks:
> + percpu_counter_destroy(&mp->m_delalloc_blks);
> free_fdblocks:
> percpu_counter_destroy(&mp->m_fdblocks);
> free_ifree:
> @@ -1057,6 +1063,7 @@ xfs_destroy_percpu_counters(
> ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
> percpu_counter_sum(&mp->m_delalloc_blks) == 0);
> percpu_counter_destroy(&mp->m_delalloc_blks);
> + percpu_counter_destroy(&mp->m_active_trans);
> }
>
> static void
> @@ -1792,7 +1799,6 @@ static int xfs_init_fs_context(
> INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
> spin_lock_init(&mp->m_perag_lock);
> mutex_init(&mp->m_growlock);
> - atomic_set(&mp->m_active_trans, 0);
> INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
> INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
> INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
> diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
> index 28b983ff8b113..636df5017782e 100644
> --- a/fs/xfs/xfs_trans.c
> +++ b/fs/xfs/xfs_trans.c
> @@ -68,7 +68,7 @@ xfs_trans_free(
> xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
>
> trace_xfs_trans_free(tp, _RET_IP_);
> - atomic_dec(&tp->t_mountp->m_active_trans);
> + percpu_counter_dec(&tp->t_mountp->m_active_trans);
> if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
> sb_end_intwrite(tp->t_mountp->m_super);
> xfs_trans_free_dqinfo(tp);
> @@ -126,7 +126,7 @@ xfs_trans_dup(
>
> xfs_trans_dup_dqinfo(tp, ntp);
>
> - atomic_inc(&tp->t_mountp->m_active_trans);
> + percpu_counter_inc(&tp->t_mountp->m_active_trans);
> return ntp;
> }
>
> @@ -275,7 +275,7 @@ xfs_trans_alloc(
> */
> WARN_ON(resp->tr_logres > 0 &&
> mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
> - atomic_inc(&mp->m_active_trans);
> + percpu_counter_inc(&mp->m_active_trans);
>
> tp->t_magic = XFS_TRANS_HEADER_MAGIC;
> tp->t_flags = flags;
> --
> 2.26.1.301.g55bc3eb7cb9
>
next prev parent reply other threads:[~2020-05-12 12:31 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-12 9:28 [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner
2020-05-12 9:28 ` [PATCH 1/5] xfs: separate read-only variables in struct xfs_mount Dave Chinner
2020-05-12 12:30 ` Brian Foster
2020-05-12 16:09 ` Darrick J. Wong
2020-05-12 21:43 ` Dave Chinner
2020-05-12 21:53 ` Dave Chinner
2020-05-12 9:28 ` [PATCH 2/5] xfs: convert m_active_trans counter to per-cpu Dave Chinner
2020-05-12 12:31 ` Brian Foster [this message]
2020-05-12 9:28 ` [PATCH 3/5] [RFC] xfs: use percpu counters for CIL context counters Dave Chinner
2020-05-12 14:05 ` Brian Foster
2020-05-12 23:36 ` Dave Chinner
2020-05-13 12:09 ` Brian Foster
2020-05-13 21:52 ` Dave Chinner
2020-05-14 1:50 ` Dave Chinner
2020-05-14 2:49 ` Dave Chinner
2020-05-14 13:43 ` Brian Foster
2020-05-12 9:28 ` [PATCH 4/5] [RFC] xfs: per-cpu CIL lists Dave Chinner
2020-05-13 17:02 ` Brian Foster
2020-05-13 23:33 ` Dave Chinner
2020-05-14 13:44 ` Brian Foster
2020-05-14 22:46 ` Dave Chinner
2020-05-15 17:26 ` Brian Foster
2020-05-18 0:30 ` Dave Chinner
2020-05-12 9:28 ` [PATCH 5/5] [RFC] xfs: make CIl busy extent lists per-cpu Dave Chinner
2020-05-12 10:25 ` [PATCH 0/5 v2] xfs: fix a couple of performance issues Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200512123136.GB37029@bfoster \
--to=bfoster@redhat.com \
--cc=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.