nvdimm.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<nvdimm@lists.linux.dev>, <linux-mm@kvack.org>,
	<linux-fsdevel@vger.kernel.org>, <dan.j.williams@intel.com>,
	<david@fromorbit.com>, <hch@infradead.org>, <jane.chu@oracle.com>
Subject: Re: [PATCH v7 6/8] mm: Introduce mf_dax_kill_procs() for fsdax case
Date: Wed, 20 Oct 2021 13:47:50 +0800	[thread overview]
Message-ID: <25f86782-ff1f-db4d-d5da-fd1e5bee45f6@fujitsu.com> (raw)
In-Reply-To: <20211014193241.GK24307@magnolia>



在 2021/10/15 3:32, Darrick J. Wong 写道:
> On Fri, Sep 24, 2021 at 09:09:57PM +0800, Shiyang Ruan wrote:
>> This function is called at the end of RMAP routine, i.e. filesystem
>> recovery function, to collect and kill processes using a shared page of
>> DAX file.  The difference between mf_generic_kill_procs() is,
>> it accepts file's mapping,offset instead of struct page.  Because
>> different file's mappings and offsets may share the same page in fsdax
>> mode.  So, it is called when filesystem RMAP results are found.
>>
>> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
>> ---
>>   fs/dax.c            | 10 ------
>>   include/linux/dax.h |  9 +++++
>>   include/linux/mm.h  |  2 ++
>>   mm/memory-failure.c | 83 ++++++++++++++++++++++++++++++++++++++++-----
>>   4 files changed, 86 insertions(+), 18 deletions(-)
>>
>> diff --git a/fs/dax.c b/fs/dax.c
>> index 509b65e60478..2536c105ec7f 100644
>> --- a/fs/dax.c
>> +++ b/fs/dax.c
>> @@ -852,16 +852,6 @@ static void *dax_insert_entry(struct xa_state *xas,
>>   	return entry;
>>   }
>>   
>> -static inline
>> -unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
>> -{
>> -	unsigned long address;
>> -
>> -	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
>> -	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
>> -	return address;
>> -}
>> -
>>   /* Walk all mappings of a given index of a file and writeprotect them */
>>   static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
>>   		unsigned long pfn)
>> diff --git a/include/linux/dax.h b/include/linux/dax.h
>> index 65411bee4312..3d90becbd160 100644
>> --- a/include/linux/dax.h
>> +++ b/include/linux/dax.h
>> @@ -258,6 +258,15 @@ static inline bool dax_mapping(struct address_space *mapping)
>>   {
>>   	return mapping->host && IS_DAX(mapping->host);
>>   }
>> +static inline unsigned long pgoff_address(pgoff_t pgoff,
>> +		struct vm_area_struct *vma)
>> +{
>> +	unsigned long address;
>> +
>> +	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
>> +	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
>> +	return address;
>> +}
>>   
>>   #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
>>   void hmem_register_device(int target_nid, struct resource *r);
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 73a52aba448f..d06af0051e53 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -3114,6 +3114,8 @@ enum mf_flags {
>>   	MF_MUST_KILL = 1 << 2,
>>   	MF_SOFT_OFFLINE = 1 << 3,
>>   };
>> +extern int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
>> +			     size_t size, int flags);
>>   extern int memory_failure(unsigned long pfn, int flags);
>>   extern void memory_failure_queue(unsigned long pfn, int flags);
>>   extern void memory_failure_queue_kick(int cpu);
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index 85eab206b68f..a9d0d487d205 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -302,10 +302,9 @@ void shake_page(struct page *p)
>>   }
>>   EXPORT_SYMBOL_GPL(shake_page);
>>   
>> -static unsigned long dev_pagemap_mapping_shift(struct page *page,
>> +static unsigned long dev_pagemap_mapping_shift(unsigned long address,
>>   		struct vm_area_struct *vma)
>>   {
>> -	unsigned long address = vma_address(page, vma);
>>   	pgd_t *pgd;
>>   	p4d_t *p4d;
>>   	pud_t *pud;
>> @@ -345,7 +344,7 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
>>    * Schedule a process for later kill.
>>    * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
>>    */
>> -static void add_to_kill(struct task_struct *tsk, struct page *p,
>> +static void add_to_kill(struct task_struct *tsk, struct page *p, pgoff_t pgoff,
> 
> Hm, so I guess you're passing the page and the pgoff now because
> page->index is meaningless for shared dax pages?  Ok.

Yes, it is for that case.

> 
>>   		       struct vm_area_struct *vma,
>>   		       struct list_head *to_kill)
>>   {
>> @@ -358,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
>>   	}
>>   
>>   	tk->addr = page_address_in_vma(p, vma);
>> -	if (is_zone_device_page(p))
>> -		tk->size_shift = dev_pagemap_mapping_shift(p, vma);
>> -	else
>> +	if (is_zone_device_page(p)) {
>> +		/*
>> +		 * Since page->mapping is no more used for fsdax, we should
>> +		 * calculate the address in a fsdax way.
>> +		 */
>> +		if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
>> +			tk->addr = pgoff_address(pgoff, vma);
>> +		tk->size_shift = dev_pagemap_mapping_shift(tk->addr, vma);
>> +	} else
>>   		tk->size_shift = page_shift(compound_head(p));
>>   
>>   	/*
>> @@ -508,7 +513,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
>>   			if (!page_mapped_in_vma(page, vma))
>>   				continue;
>>   			if (vma->vm_mm == t->mm)
>> -				add_to_kill(t, page, vma, to_kill);
>> +				add_to_kill(t, page, 0, vma, to_kill);
>>   		}
>>   	}
>>   	read_unlock(&tasklist_lock);
>> @@ -544,7 +549,32 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
>>   			 * to be informed of all such data corruptions.
>>   			 */
>>   			if (vma->vm_mm == t->mm)
>> -				add_to_kill(t, page, vma, to_kill);
>> +				add_to_kill(t, page, 0, vma, to_kill);
>> +		}
>> +	}
>> +	read_unlock(&tasklist_lock);
>> +	i_mmap_unlock_read(mapping);
>> +}
>> +
>> +/*
>> + * Collect processes when the error hit a fsdax page.
>> + */
>> +static void collect_procs_fsdax(struct page *page, struct address_space *mapping,
>> +		pgoff_t pgoff, struct list_head *to_kill)
>> +{
>> +	struct vm_area_struct *vma;
>> +	struct task_struct *tsk;
>> +
>> +	i_mmap_lock_read(mapping);
>> +	read_lock(&tasklist_lock);
>> +	for_each_process(tsk) {
>> +		struct task_struct *t = task_early_kill(tsk, true);
>> +
>> +		if (!t)
>> +			continue;
>> +		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
>> +			if (vma->vm_mm == t->mm)
>> +				add_to_kill(t, page, pgoff, vma, to_kill);
>>   		}
>>   	}
>>   	read_unlock(&tasklist_lock);
>> @@ -1503,6 +1533,43 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
>>   	return 0;
>>   }
>>   
>> +/**
>> + * mf_dax_kill_procs - Collect and kill processes who are using this file range
>> + * @mapping:	the file in use
>> + * @index:	start offset of the range
>> + * @size:	length of the range
> 
> It feels odd that one argument is in units of pgoff_t but the other is
> in bytes.

The index is page aligned but @size may not be.  I will explain it in 
detail in the comments.

> 
>> + * @flags:	memory failure flags
>> + */
>> +int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
>> +		size_t size, int flags)
>> +{
>> +	LIST_HEAD(to_kill);
>> +	dax_entry_t cookie;
>> +	struct page *page;
>> +	size_t end = (index << PAGE_SHIFT) + size;
>> +
>> +	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
> 
> Hm.  What flags will we be passing to the xfs_dax_notify_failure_fn?
> Does XFS itself have to care about what's in the flags values, or is it
> really just a magic cookie to be passed from the mm layer into the fs
> and back to mf_dax_kill_procs?
> 

Just to pass the flag from mm layer to mf_dax_kill_procs().  No one 
inside this RMAP progress will care about or change it.  As you 
mentioned in the next patch, I think this should be named with a "mf_" 
prefix to make it easier to understand.


--
Thanks,
Ruan.

> --D
> 
>> +
>> +	for (; (index << PAGE_SHIFT) < end; index++) {
>> +		page = NULL;
>> +		cookie = dax_lock_mapping_entry(mapping, index, &page);
>> +		if (!cookie)
>> +			return -EBUSY;
>> +		if (!page)
>> +			goto unlock;
>> +
>> +		SetPageHWPoison(page);
>> +
>> +		collect_procs_fsdax(page, mapping, index, &to_kill);
>> +		unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
>> +				index, flags);
>> +unlock:
>> +		dax_unlock_mapping_entry(mapping, index, cookie);
>> +	}
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
>> +
>>   static int memory_failure_hugetlb(unsigned long pfn, int flags)
>>   {
>>   	struct page *p = pfn_to_page(pfn);
>> -- 
>> 2.33.0
>>
>>
>>



  reply	other threads:[~2021-10-20  5:47 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-24 13:09 [PATCH v7 0/8] [PATCH v7 0/8] fsdax: introduce fs query to support reflink Shiyang Ruan
2021-09-24 13:09 ` [PATCH v7 1/8] dax: Use rwsem for dax_{read,write}_lock() Shiyang Ruan
2021-10-14 17:48   ` Darrick J. Wong
2021-10-20  5:19     ` Shiyang Ruan
2021-10-15  6:30   ` Christoph Hellwig
2021-09-24 13:09 ` [PATCH v7 2/8] dax: Introduce holder for dax_device Shiyang Ruan
2021-10-14 18:00   ` Darrick J. Wong
2021-10-20  6:58     ` Shiyang Ruan
2021-09-24 13:09 ` [PATCH v7 3/8] mm: factor helpers for memory_failure_dev_pagemap Shiyang Ruan
2021-10-14 18:02   ` Darrick J. Wong
2021-10-15  6:33   ` Christoph Hellwig
2021-09-24 13:09 ` [PATCH v7 4/8] pagemap,pmem: Introduce ->memory_failure() Shiyang Ruan
2021-10-14 18:05   ` Darrick J. Wong
2021-10-20  5:25     ` Shiyang Ruan
2021-10-15  6:36   ` Christoph Hellwig
2021-09-24 13:09 ` [PATCH v7 5/8] fsdax: Introduce dax_lock_mapping_entry() Shiyang Ruan
2021-10-14 18:17   ` Darrick J. Wong
2021-09-24 13:09 ` [PATCH v7 6/8] mm: Introduce mf_dax_kill_procs() for fsdax case Shiyang Ruan
2021-10-14 19:32   ` Darrick J. Wong
2021-10-20  5:47     ` Shiyang Ruan [this message]
2021-09-24 13:09 ` [PATCH v7 7/8] xfs: Implement ->notify_failure() for XFS Shiyang Ruan
2021-10-14 19:21   ` Darrick J. Wong
2021-10-15  6:41   ` Christoph Hellwig
2021-09-24 13:09 ` [PATCH v7 8/8] fsdax: add exception for reflinked files Shiyang Ruan
2021-10-14 19:24   ` Darrick J. Wong
2021-10-15  6:38     ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=25f86782-ff1f-db4d-d5da-fd1e5bee45f6@fujitsu.com \
    --to=ruansy.fnst@fujitsu.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jane.chu@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).