All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Dave Chinner <david@fromorbit.com>
Cc: linux-xfs@vger.kernel.org
Subject: Re: [PATCH 29/39] xfs: introduce per-cpu CIL tracking structure
Date: Thu, 3 Jun 2021 09:49:37 -0700	[thread overview]
Message-ID: <20210603164937.GG26402@locust> (raw)
In-Reply-To: <20210603052240.171998-30-david@fromorbit.com>

On Thu, Jun 03, 2021 at 03:22:30PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> The CIL push lock is highly contended on larger machines, becoming a
> hard bottleneck that about 700,000 transaction commits/s on >16p
> machines. To address this, start moving the CIL tracking
> infrastructure to utilise per-CPU structures.
> 
> We need to track the space used, the amount of log reservation space
> reserved to write the CIL, the log items in the CIL and the busy
> extents that need to be completed by the CIL commit.  This requires
> a couple of per-cpu counters, an unordered per-cpu list and a
> globally ordered per-cpu list.
> 
> Create a per-cpu structure to hold these and all the management
> interfaces needed, as well as the hooks to handle hotplug CPUs.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_log_cil.c       | 107 +++++++++++++++++++++++++++++++++++++
>  fs/xfs/xfs_log_priv.h      |  13 +++++
>  include/linux/cpuhotplug.h |   1 +
>  3 files changed, 121 insertions(+)
> 
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index 738dc4248113..791ed1058fb4 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -1369,6 +1369,106 @@ xfs_log_item_in_current_chkpt(
>  	return lip->li_seq == cil->xc_ctx->sequence;
>  }
>  
> +#ifdef CONFIG_HOTPLUG_CPU
> +static LIST_HEAD(xlog_cil_pcp_list);
> +static DEFINE_SPINLOCK(xlog_cil_pcp_lock);
> +static bool xlog_cil_pcp_init;
> +
> +/*
> + * Move dead percpu state to the relevant CIL context structures.
> + *
> + * We have to lock the CIL context here to ensure that nothing is modifying
> + * the percpu state, either addition or removal. Both of these are done under
> + * the CIL context lock, so grabbing that exclusively here will ensure we can
> + * safely drain the cilpcp for the CPU that is dying.
> + */
> +static int
> +xlog_cil_pcp_dead(
> +	unsigned int		cpu)
> +{
> +	struct xfs_cil		*cil, *n;
> +
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_for_each_entry_safe(cil, n, &xlog_cil_pcp_list, xc_pcp_list) {
> +		spin_unlock(&xlog_cil_pcp_lock);
> +		down_write(&cil->xc_ctx_lock);
> +		/* move stuff on dead CPU to context */
> +		up_write(&cil->xc_ctx_lock);
> +		spin_lock(&xlog_cil_pcp_lock);
> +	}
> +	spin_unlock(&xlog_cil_pcp_lock);
> +	return 0;
> +}
> +
> +static int
> +xlog_cil_pcp_hpadd(
> +	struct xfs_cil		*cil)
> +{
> +	if (!xlog_cil_pcp_init) {
> +		int	ret;
> +
> +		ret = cpuhp_setup_state_nocalls(CPUHP_XFS_CIL_DEAD,
> +						"xfs/cil_pcp:dead", NULL,
> +						xlog_cil_pcp_dead);
> +		if (ret < 0) {
> +			xfs_warn(cil->xc_log->l_mp,
> +	"Failed to initialise CIL hotplug, error %d. XFS is non-functional.",
> +				ret);
> +			ASSERT(0);
> +			return -ENOMEM;
> +		}
> +		xlog_cil_pcp_init = true;
> +	}
> +
> +	INIT_LIST_HEAD(&cil->xc_pcp_list);
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_add(&cil->xc_pcp_list, &xlog_cil_pcp_list);
> +	spin_unlock(&xlog_cil_pcp_lock);
> +	return 0;
> +}
> +
> +static void
> +xlog_cil_pcp_hpremove(
> +	struct xfs_cil		*cil)
> +{
> +	spin_lock(&xlog_cil_pcp_lock);
> +	list_del(&cil->xc_pcp_list);
> +	spin_unlock(&xlog_cil_pcp_lock);
> +}
> +
> +#else /* !CONFIG_HOTPLUG_CPU */
> +static inline void xlog_cil_pcp_hpadd(struct xfs_cil *cil) {}

The signature doesn't match the one above, which I think is why the
kbuild robot complained.

Now that I've gotten my head wrapped around the percpu cil tracking
structures, I think the set looks reasonable.  I have some logistical
questions about this very long series, but I'll ask them in a reply to
the cover letter.

With the !HOTPLUG function signature fixed,
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> +static inline void xlog_cil_pcp_hpremove(struct xfs_cil *cil) {}
> +#endif
> +
> +static void __percpu *
> +xlog_cil_pcp_alloc(
> +	struct xfs_cil		*cil)
> +{
> +	void __percpu		*pcp;
> +
> +	pcp = alloc_percpu(struct xlog_cil_pcp);
> +	if (!pcp)
> +		return NULL;
> +
> +	if (xlog_cil_pcp_hpadd(cil) < 0) {
> +		free_percpu(pcp);
> +		return NULL;
> +	}
> +	return pcp;
> +}
> +
> +static void
> +xlog_cil_pcp_free(
> +	struct xfs_cil		*cil,
> +	void __percpu		*pcp)
> +{
> +	if (!pcp)
> +		return;
> +	xlog_cil_pcp_hpremove(cil);
> +	free_percpu(pcp);
> +}
> +
>  /*
>   * Perform initial CIL structure initialisation.
>   */
> @@ -1383,6 +1483,12 @@ xlog_cil_init(
>  	if (!cil)
>  		return -ENOMEM;
>  
> +	cil->xc_pcp = xlog_cil_pcp_alloc(cil);
> +	if (!cil->xc_pcp) {
> +		kmem_free(cil);
> +		return -ENOMEM;
> +	}
> +
>  	INIT_LIST_HEAD(&cil->xc_cil);
>  	INIT_LIST_HEAD(&cil->xc_committing);
>  	spin_lock_init(&cil->xc_cil_lock);
> @@ -1413,6 +1519,7 @@ xlog_cil_destroy(
>  
>  	ASSERT(list_empty(&cil->xc_cil));
>  	ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
> +	xlog_cil_pcp_free(cil, cil->xc_pcp);
>  	kmem_free(cil);
>  }
>  
> diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
> index 85a85ab569fe..d76deffa4690 100644
> --- a/fs/xfs/xfs_log_priv.h
> +++ b/fs/xfs/xfs_log_priv.h
> @@ -227,6 +227,14 @@ struct xfs_cil_ctx {
>  	struct work_struct	push_work;
>  };
>  
> +/*
> + * Per-cpu CIL tracking items
> + */
> +struct xlog_cil_pcp {
> +	struct list_head	busy_extents;
> +	struct list_head	log_items;
> +};
> +
>  /*
>   * Committed Item List structure
>   *
> @@ -260,6 +268,11 @@ struct xfs_cil {
>  	wait_queue_head_t	xc_commit_wait;
>  	xfs_csn_t		xc_current_sequence;
>  	wait_queue_head_t	xc_push_wait;	/* background push throttle */
> +
> +	void __percpu		*xc_pcp;	/* percpu CIL structures */
> +#ifdef CONFIG_HOTPLUG_CPU
> +	struct list_head	xc_pcp_list;
> +#endif
>  } ____cacheline_aligned_in_smp;
>  
>  /* xc_flags bit values */
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 4a62b3980642..3d3ccde9e9c8 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -52,6 +52,7 @@ enum cpuhp_state {
>  	CPUHP_FS_BUFF_DEAD,
>  	CPUHP_PRINTK_DEAD,
>  	CPUHP_MM_MEMCQ_DEAD,
> +	CPUHP_XFS_CIL_DEAD,
>  	CPUHP_PERCPU_CNT_DEAD,
>  	CPUHP_RADIX_DEAD,
>  	CPUHP_PAGE_ALLOC_DEAD,
> -- 
> 2.31.1
> 

  parent reply	other threads:[~2021-06-03 16:49 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-03  5:22 [PATCH 00/39 v5] xfs: CIL and log optimisations Dave Chinner
2021-06-03  5:22 ` [PATCH 01/39] xfs: log stripe roundoff is a property of the log Dave Chinner
2021-06-03  5:22 ` [PATCH 02/39] xfs: separate CIL commit record IO Dave Chinner
2021-06-03  5:22 ` [PATCH 03/39] xfs: remove xfs_blkdev_issue_flush Dave Chinner
2021-06-03  5:22 ` [PATCH 04/39] xfs: async blkdev cache flush Dave Chinner
2021-06-03  5:22 ` [PATCH 05/39] xfs: CIL checkpoint flushes caches unconditionally Dave Chinner
2021-06-03  5:22 ` [PATCH 06/39] xfs: remove need_start_rec parameter from xlog_write() Dave Chinner
2021-06-03 22:29   ` Allison Henderson
2021-06-03  5:22 ` [PATCH 07/39] xfs: journal IO cache flush reductions Dave Chinner
2021-06-03 22:29   ` Allison Henderson
2021-06-04  5:15   ` [xfs] 80d287de7b: stress-ng.dir.ops_per_sec 80.1% improvement kernel test robot
2021-06-04  5:15     ` kernel test robot
2021-06-03  5:22 ` [PATCH 08/39] xfs: Fix CIL throttle hang when CIL space used going backwards Dave Chinner
2021-06-03 22:29   ` Allison Henderson
2021-06-03  5:22 ` [PATCH 09/39] xfs: xfs_log_force_lsn isn't passed a LSN Dave Chinner
2021-06-03 22:29   ` Allison Henderson
2021-06-03  5:22 ` [PATCH 10/39] xfs: AIL needs asynchronous CIL forcing Dave Chinner
2021-06-03 22:29   ` Allison Henderson
2021-06-03  5:22 ` [PATCH 11/39] xfs: CIL work is serialised, not pipelined Dave Chinner
2021-06-03  5:22 ` [PATCH 12/39] xfs: factor out the CIL transaction header building Dave Chinner
2021-06-03  5:22 ` [PATCH 13/39] xfs: only CIL pushes require a start record Dave Chinner
2021-06-03  5:22 ` [PATCH 14/39] xfs: embed the xlog_op_header in the unmount record Dave Chinner
2021-06-03  5:22 ` [PATCH 15/39] xfs: embed the xlog_op_header in the commit record Dave Chinner
2021-06-03  5:22 ` [PATCH 16/39] xfs: log tickets don't need log client id Dave Chinner
2021-06-03  5:22 ` [PATCH 17/39] xfs: move log iovec alignment to preparation function Dave Chinner
2021-06-03  5:22 ` [PATCH 18/39] xfs: reserve space and initialise xlog_op_header in item formatting Dave Chinner
2021-06-03  5:22 ` [PATCH 19/39] xfs: log ticket region debug is largely useless Dave Chinner
2021-06-03  5:22 ` [PATCH 20/39] xfs: pass lv chain length into xlog_write() Dave Chinner
2021-06-03  5:22 ` [PATCH 21/39] xfs: introduce xlog_write_single() Dave Chinner
2021-06-03  5:22 ` [PATCH 22/39] xfs:_introduce xlog_write_partial() Dave Chinner
2021-06-03  5:22 ` [PATCH 23/39] xfs: xlog_write() no longer needs contwr state Dave Chinner
2021-06-03  5:22 ` [PATCH 24/39] xfs: xlog_write() doesn't need optype anymore Dave Chinner
2021-06-03  5:22 ` [PATCH 25/39] xfs: CIL context doesn't need to count iovecs Dave Chinner
2021-06-03  5:22 ` [PATCH 26/39] xfs: use the CIL space used counter for emptiness checks Dave Chinner
2021-06-03  5:22 ` [PATCH 27/39] xfs: lift init CIL reservation out of xc_cil_lock Dave Chinner
2021-06-03  5:22 ` [PATCH 28/39] xfs: rework per-iclog header CIL reservation Dave Chinner
2021-06-03  5:22 ` [PATCH 29/39] xfs: introduce per-cpu CIL tracking structure Dave Chinner
2021-06-03  9:18   ` kernel test robot
2021-06-03  9:18     ` kernel test robot
2021-06-03 10:08   ` kernel test robot
2021-06-03 10:08     ` kernel test robot
2021-06-03 16:49   ` Darrick J. Wong [this message]
2021-06-03  5:22 ` [PATCH 30/39] xfs: implement percpu cil space used calculation Dave Chinner
2021-06-03 16:44   ` Darrick J. Wong
2021-06-03  5:22 ` [PATCH 31/39] xfs: track CIL ticket reservation in percpu structure Dave Chinner
2021-06-03 16:43   ` Darrick J. Wong
2021-06-03  5:22 ` [PATCH 32/39] xfs: convert CIL busy extents to per-cpu Dave Chinner
2021-06-03  5:22 ` [PATCH 33/39] xfs: Add order IDs to log items in CIL Dave Chinner
2021-06-03  5:22 ` [PATCH 34/39] xfs: convert CIL to unordered per cpu lists Dave Chinner
2021-06-03  5:22 ` [PATCH 35/39] xfs: convert log vector chain to use list heads Dave Chinner
2021-06-03 10:16   ` kernel test robot
2021-06-03 10:16     ` kernel test robot
2021-06-03  5:22 ` [PATCH 36/39] xfs: move CIL ordering to the logvec chain Dave Chinner
2021-06-03  5:22 ` [PATCH 37/39] xfs: avoid cil push lock if possible Dave Chinner
2021-06-03  5:22 ` [PATCH 38/39] xfs: xlog_sync() manually adjusts grant head space Dave Chinner
2021-06-03  5:22 ` [PATCH 39/39] xfs: expanding delayed logging design with background material Dave Chinner
2021-06-03 17:05 ` [PATCH 00/39 v5] xfs: CIL and log optimisations Darrick J. Wong
2021-06-03 22:43   ` Dave Chinner
  -- strict thread matches above, loose matches on Subject: below --
2021-05-19 12:12 [PATCH 00/39 v4] " Dave Chinner
2021-05-19 12:13 ` [PATCH 29/39] xfs: introduce per-cpu CIL tracking structure Dave Chinner
2021-05-27 18:31   ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210603164937.GG26402@locust \
    --to=djwong@kernel.org \
    --cc=david@fromorbit.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.