All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Huang, Ying" <ying.huang@intel.com>
To: Miaohe Lin <linmiaohe@huawei.com>
Cc: <akpm@linux-foundation.org>, <dennis@kernel.org>,
	<tim.c.chen@linux.intel.com>, <hughd@google.com>,
	<hannes@cmpxchg.org>, <mhocko@suse.com>, <iamjoonsoo.kim@lge.com>,
	<alexs@kernel.org>, <david@redhat.com>, <minchan@kernel.org>,
	<richard.weiyang@gmail.com>, <linux-kernel@vger.kernel.org>,
	<linux-mm@kvack.org>
Subject: Re: [PATCH v2 1/5] mm/swapfile: add percpu_ref support for swap
Date: Mon, 19 Apr 2021 15:09:49 +0800	[thread overview]
Message-ID: <87czuq4uo2.fsf@yhuang6-desk1.ccr.corp.intel.com> (raw)
In-Reply-To: <753f414f-34a1-b16a-f826-7deb2dcd4af6@huawei.com> (Miaohe Lin's message of "Mon, 19 Apr 2021 14:46:07 +0800")

Miaohe Lin <linmiaohe@huawei.com> writes:

> On 2021/4/19 10:48, Huang, Ying wrote:
>> Miaohe Lin <linmiaohe@huawei.com> writes:
>> 
>>> We will use percpu-refcount to serialize against concurrent swapoff. This
>>> patch adds the percpu_ref support for swap.
>>>
>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>>> ---
>>>  include/linux/swap.h |  3 +++
>>>  mm/swapfile.c        | 33 +++++++++++++++++++++++++++++----
>>>  2 files changed, 32 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>>> index 144727041e78..8be36eb58b7a 100644
>>> --- a/include/linux/swap.h
>>> +++ b/include/linux/swap.h
>>> @@ -240,6 +240,7 @@ struct swap_cluster_list {
>>>   * The in-memory structure used to track swap areas.
>>>   */
>>>  struct swap_info_struct {
>>> +	struct percpu_ref users;	/* serialization against concurrent swapoff */
>> 
>> The comments aren't general enough.  We use this to check whether the
>> swap device has been fully initialized, etc. May be something as below?
>> 
>> /* indicate and keep swap device valid */
>
> Looks good.
>
>> 
>>>  	unsigned long	flags;		/* SWP_USED etc: see above */
>>>  	signed short	prio;		/* swap priority of this type */
>>>  	struct plist_node list;		/* entry in swap_active_head */
>>> @@ -260,6 +261,8 @@ struct swap_info_struct {
>>>  	struct block_device *bdev;	/* swap device or bdev of swap file */
>>>  	struct file *swap_file;		/* seldom referenced */
>>>  	unsigned int old_block_size;	/* seldom referenced */
>>> +	bool ref_initialized;		/* seldom referenced */
>>> +	struct completion comp;		/* seldom referenced */
>>>  #ifdef CONFIG_FRONTSWAP
>>>  	unsigned long *frontswap_map;	/* frontswap in-use, one bit per page */
>>>  	atomic_t frontswap_pages;	/* frontswap pages in-use counter */
>>> diff --git a/mm/swapfile.c b/mm/swapfile.c
>>> index 149e77454e3c..66515a3a2824 100644
>>> --- a/mm/swapfile.c
>>> +++ b/mm/swapfile.c
>>> @@ -39,6 +39,7 @@
>>>  #include <linux/export.h>
>>>  #include <linux/swap_slots.h>
>>>  #include <linux/sort.h>
>>> +#include <linux/completion.h>
>>>  
>>>  #include <asm/tlbflush.h>
>>>  #include <linux/swapops.h>
>>> @@ -511,6 +512,14 @@ static void swap_discard_work(struct work_struct *work)
>>>  	spin_unlock(&si->lock);
>>>  }
>>>  
>>> +static void swap_users_ref_free(struct percpu_ref *ref)
>>> +{
>>> +	struct swap_info_struct *si;
>>> +
>>> +	si = container_of(ref, struct swap_info_struct, users);
>>> +	complete(&si->comp);
>>> +}
>>> +
>>>  static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
>>>  {
>>>  	struct swap_cluster_info *ci = si->cluster_info;
>>> @@ -2500,7 +2509,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
>>>  	 * Guarantee swap_map, cluster_info, etc. fields are valid
>>>  	 * between get/put_swap_device() if SWP_VALID bit is set
>>>  	 */
>>> -	synchronize_rcu();
>> 
>> You cannot remove this without changing get/put_swap_device().  It's
>> better to squash at least PATCH 1-2.
>
> Will squash PATCH 1-2. Thanks.
>
>> 
>>> +	percpu_ref_resurrect(&p->users);
>>>  	spin_lock(&swap_lock);
>>>  	spin_lock(&p->lock);
>>>  	_enable_swap_info(p);
>>> @@ -2621,11 +2630,18 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
>>>  	p->flags &= ~SWP_VALID;		/* mark swap device as invalid */
>>>  	spin_unlock(&p->lock);
>>>  	spin_unlock(&swap_lock);
>>> +
>>> +	percpu_ref_kill(&p->users);
>>>  	/*
>>> -	 * wait for swap operations protected by get/put_swap_device()
>>> -	 * to complete
>>> +	 * We need synchronize_rcu() here to protect the accessing
>>> +	 * to the swap cache data structure.
>>>  	 */
>>>  	synchronize_rcu();
>>> +	/*
>>> +	 * Wait for swap operations protected by get/put_swap_device()
>>> +	 * to complete.
>>> +	 */
>> 
>> I think the comments (after some revision) can be moved before
>> percpu_ref_kill().  The synchronize_rcu() comments can be merged.
>> 
>
> Ok.
>
>>> +	wait_for_completion(&p->comp);
>>>  
>>>  	flush_work(&p->discard_work);
>>>  
>>> @@ -3132,7 +3148,7 @@ static bool swap_discardable(struct swap_info_struct *si)
>>>  SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>>>  {
>>>  	struct swap_info_struct *p;
>>> -	struct filename *name;
>>> +	struct filename *name = NULL;
>>>  	struct file *swap_file = NULL;
>>>  	struct address_space *mapping;
>>>  	int prio;
>>> @@ -3163,6 +3179,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
>>>  
>>>  	INIT_WORK(&p->discard_work, swap_discard_work);
>>>  
>>> +	if (!p->ref_initialized) {
>> 
>> I don't think it's necessary to add another flag p->ref_initialized.  We
>> can distinguish newly allocated and reused swap_info_struct in alloc_swap_info().
>> 
>
> If newly allocated swap_info_struct failed to init percpu_ref, it will be considered as
> a reused one in alloc_swap_info() _but_ the field users of swap_info_struct is actually
> uninitialized. Does this make sense for you?

We can call percpu_ref_init() just after kvzalloc() in alloc_swap_info().

Best Regards,
Huang, Ying

> Many Thanks for quick review.
>
>> Best Regards,
>> Huang, Ying
>> 
>>> +		error = percpu_ref_init(&p->users, swap_users_ref_free,
>>> +					PERCPU_REF_INIT_DEAD, GFP_KERNEL);
>>> +		if (unlikely(error))
>>> +			goto bad_swap;
>>> +		init_completion(&p->comp);
>>> +		p->ref_initialized = true;
>>> +	}
>>> +
>>>  	name = getname(specialfile);
>>>  	if (IS_ERR(name)) {
>>>  		error = PTR_ERR(name);
>> .
>> 

  reply	other threads:[~2021-04-19  7:09 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-17  9:40 [PATCH v2 0/5] close various race windows for swap Miaohe Lin
2021-04-17  9:40 ` [PATCH v2 1/5] mm/swapfile: add percpu_ref support " Miaohe Lin
2021-04-19  2:48   ` Huang, Ying
2021-04-19  2:48     ` Huang, Ying
2021-04-19  6:46     ` Miaohe Lin
2021-04-19  7:09       ` Huang, Ying [this message]
2021-04-19  7:09         ` Huang, Ying
2021-04-19  7:35         ` Miaohe Lin
2021-04-19  7:52           ` Huang, Ying
2021-04-19  7:52             ` Huang, Ying
2021-04-19  8:20             ` Miaohe Lin
2021-04-17  9:40 ` [PATCH v2 2/5] mm/swapfile: use percpu_ref to serialize against concurrent swapoff Miaohe Lin
2021-04-19  2:54   ` Huang, Ying
2021-04-19  2:54     ` Huang, Ying
2021-04-19  6:57     ` Miaohe Lin
2021-04-17  9:40 ` [PATCH v2 3/5] swap: fix do_swap_page() race with swapoff Miaohe Lin
2021-04-19  2:23   ` Huang, Ying
2021-04-19  2:23     ` Huang, Ying
2021-04-19  6:54     ` Miaohe Lin
2021-04-17  9:40 ` [PATCH v2 4/5] mm/swap: remove confusing checking for non_swap_entry() in swap_ra_info() Miaohe Lin
2021-04-19  1:53   ` Huang, Ying
2021-04-19  1:53     ` Huang, Ying
2021-04-19  6:46     ` Miaohe Lin
2021-04-17  9:40 ` [PATCH v2 5/5] mm/shmem: fix shmem_swapin() race with swapoff Miaohe Lin
2021-04-19  2:15   ` Huang, Ying
2021-04-19  2:15     ` Huang, Ying
2021-04-19  6:49     ` Miaohe Lin
2021-04-19  7:04       ` Huang, Ying
2021-04-19  7:04         ` Huang, Ying
2021-04-19  7:14         ` Miaohe Lin
2021-04-19  7:41           ` Huang, Ying
2021-04-19  7:41             ` Huang, Ying
2021-04-19  8:18             ` Miaohe Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87czuq4uo2.fsf@yhuang6-desk1.ccr.corp.intel.com \
    --to=ying.huang@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexs@kernel.org \
    --cc=david@redhat.com \
    --cc=dennis@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=minchan@kernel.org \
    --cc=richard.weiyang@gmail.com \
    --cc=tim.c.chen@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.