All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<nvdimm@lists.linux.dev>, <linux-mm@kvack.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: <djwong@kernel.org>, <dan.j.williams@intel.com>,
	<david@fromorbit.com>, <hch@infradead.org>,
	<akpm@linux-foundation.org>, <jane.chu@oracle.com>,
	<rgoldwyn@suse.de>, <viro@zeniv.linux.org.uk>,
	<willy@infradead.org>, <naoya.horiguchi@nec.com>,
	<linmiaohe@huawei.com>, Christoph Hellwig <hch@lst.de>
Subject: [PATCH v2 05/14] mm: Introduce mf_dax_kill_procs() for fsdax case
Date: Fri, 3 Jun 2022 13:37:29 +0800	[thread overview]
Message-ID: <20220603053738.1218681-6-ruansy.fnst@fujitsu.com> (raw)
In-Reply-To: <20220603053738.1218681-1-ruansy.fnst@fujitsu.com>

This new function is a variant of mf_generic_kill_procs that accepts a
file, offset pair instead of a struct to support multiple files sharing
a DAX mapping.  It is intended to be called by the file systems as part
of the memory_failure handler after the file system performed a reverse
mapping from the storage address to the file and file offset.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
---
 include/linux/mm.h  |  2 +
 mm/memory-failure.c | 96 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 88 insertions(+), 10 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8a96197b9afd..623c2ee8330a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3250,6 +3250,8 @@ enum mf_flags {
 	MF_UNPOISON = 1 << 4,
 	MF_NO_RETRY = 1 << 5,
 };
+int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
+		      unsigned long count, int mf_flags);
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
 extern void memory_failure_queue_kick(int cpu);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a9d93c30a1e4..5d015e1387bd 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -301,10 +301,9 @@ void shake_page(struct page *p)
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
-static unsigned long dev_pagemap_mapping_shift(struct page *page,
-		struct vm_area_struct *vma)
+static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
+		unsigned long address)
 {
-	unsigned long address = vma_address(page, vma);
 	unsigned long ret = 0;
 	pgd_t *pgd;
 	p4d_t *p4d;
@@ -344,10 +343,14 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
 /*
  * Schedule a process for later kill.
  * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ *
+ * Notice: @fsdax_pgoff is used only when @p is a fsdax page.
+ *   In other cases, such as anonymous and file-backend page, the address to be
+ *   killed can be caculated by @p itself.
  */
 static void add_to_kill(struct task_struct *tsk, struct page *p,
-		       struct vm_area_struct *vma,
-		       struct list_head *to_kill)
+			pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
+			struct list_head *to_kill)
 {
 	struct to_kill *tk;
 
@@ -358,9 +361,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 	}
 
 	tk->addr = page_address_in_vma(p, vma);
-	if (is_zone_device_page(p))
-		tk->size_shift = dev_pagemap_mapping_shift(p, vma);
-	else
+	if (is_zone_device_page(p)) {
+		/*
+		 * Since page->mapping is not used for fsdax, we need
+		 * calculate the address based on the vma.
+		 */
+		if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
+			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
+		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
+	} else
 		tk->size_shift = page_shift(compound_head(p));
 
 	/*
@@ -509,7 +518,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
 			if (!page_mapped_in_vma(page, vma))
 				continue;
 			if (vma->vm_mm == t->mm)
-				add_to_kill(t, page, vma, to_kill);
+				add_to_kill(t, page, 0, vma, to_kill);
 		}
 	}
 	read_unlock(&tasklist_lock);
@@ -545,13 +554,41 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 			 * to be informed of all such data corruptions.
 			 */
 			if (vma->vm_mm == t->mm)
-				add_to_kill(t, page, vma, to_kill);
+				add_to_kill(t, page, 0, vma, to_kill);
 		}
 	}
 	read_unlock(&tasklist_lock);
 	i_mmap_unlock_read(mapping);
 }
 
+#ifdef CONFIG_FS_DAX
+/*
+ * Collect processes when the error hit a fsdax page.
+ */
+static void collect_procs_fsdax(struct page *page,
+		struct address_space *mapping, pgoff_t pgoff,
+		struct list_head *to_kill)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+
+	i_mmap_lock_read(mapping);
+	read_lock(&tasklist_lock);
+	for_each_process(tsk) {
+		struct task_struct *t = task_early_kill(tsk, true);
+
+		if (!t)
+			continue;
+		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
+			if (vma->vm_mm == t->mm)
+				add_to_kill(t, page, pgoff, vma, to_kill);
+		}
+	}
+	read_unlock(&tasklist_lock);
+	i_mmap_unlock_read(mapping);
+}
+#endif /* CONFIG_FS_DAX */
+
 /*
  * Collect the processes who have the corrupted page mapped to kill.
  */
@@ -1591,6 +1628,45 @@ static int mf_generic_kill_procs(unsigned long long pfn, int flags,
 	return rc;
 }
 
+#ifdef CONFIG_FS_DAX
+/**
+ * mf_dax_kill_procs - Collect and kill processes who are using this file range
+ * @mapping:	address_space of the file in use
+ * @index:	start pgoff of the range within the file
+ * @count:	length of the range, in unit of PAGE_SIZE
+ * @mf_flags:	memory failure flags
+ */
+int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
+		unsigned long count, int mf_flags)
+{
+	LIST_HEAD(to_kill);
+	dax_entry_t cookie;
+	struct page *page;
+	size_t end = index + count;
+
+	mf_flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
+
+	for (; index < end; index++) {
+		page = NULL;
+		cookie = dax_lock_mapping_entry(mapping, index, &page);
+		if (!cookie)
+			return -EBUSY;
+		if (!page)
+			goto unlock;
+
+		SetPageHWPoison(page);
+
+		collect_procs_fsdax(page, mapping, index, &to_kill);
+		unmap_and_kill(&to_kill, page_to_pfn(page), mapping,
+				index, mf_flags);
+unlock:
+		dax_unlock_mapping_entry(mapping, index, cookie);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
+#endif /* CONFIG_FS_DAX */
+
 /*
  * Called from hugetlb code with hugetlb_lock held.
  *
-- 
2.36.1




  parent reply	other threads:[~2022-06-03  5:37 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-03  5:37 [PATCHSETS v2] v14 fsdax-rmap + v11 fsdax-reflink Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 01/14] dax: Introduce holder for dax_device Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 02/14] mm: factor helpers for memory_failure_dev_pagemap Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 03/14] pagemap,pmem: Introduce ->memory_failure() Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 04/14] fsdax: Introduce dax_lock_mapping_entry() Shiyang Ruan
2022-06-03  5:37 ` Shiyang Ruan [this message]
2022-08-24 21:52   ` [PATCH v2 05/14] mm: Introduce mf_dax_kill_procs() for fsdax case Dan Williams
2022-08-24 23:42     ` HORIGUCHI NAOYA(堀口 直也)
2022-08-25  4:33       ` Dan Williams
2022-08-25  5:05         ` Dan Williams
2022-08-25 19:28           ` Dan Williams
2022-06-03  5:37 ` [PATCH v2 06/14] xfs: Implement ->notify_failure() for XFS Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 07/14] fsdax: set a CoW flag when associate reflink mappings Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 08/14] fsdax: Output address in dax_iomap_pfn() and rename it Shiyang Ruan
2022-06-07 14:38   ` [PATCH v2.1 " Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 09/14] fsdax: Introduce dax_iomap_cow_copy() Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 10/14] fsdax: Replace mmap entry in case of CoW Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 11/14] fsdax: Add dax_iomap_cow_copy() for dax zero Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 12/14] fsdax: Dedup file range to use a compare function Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 13/14] xfs: support CoW in fsdax mode Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 14/14] xfs: Add dax dedupe support Shiyang Ruan
2022-06-17  2:31 ` [PATCHSETS v2] v14 fsdax-rmap + v11 fsdax-reflink Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220603053738.1218681-6-ruansy.fnst@fujitsu.com \
    --to=ruansy.fnst@fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=hch@lst.de \
    --cc=jane.chu@oracle.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=naoya.horiguchi@nec.com \
    --cc=nvdimm@lists.linux.dev \
    --cc=rgoldwyn@suse.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.