All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<linux-nvdimm@lists.01.org>, <linux-mm@kvack.org>
Cc: linux-fsdevel@vger.kernel.org, linux-raid@vger.kernel.org,
	darrick.wong@oracle.com, david@fromorbit.com, hch@lst.de,
	song@kernel.org, rgoldwyn@suse.de, qi.fuli@fujitsu.com,
	y-goto@fujitsu.com
Subject: [RFC PATCH v3 5/9] mm, pmem: Implement ->memory_failure() in pmem driver
Date: Tue, 15 Dec 2020 20:14:10 +0800	[thread overview]
Message-ID: <20201215121414.253660-6-ruansy.fnst@cn.fujitsu.com> (raw)
In-Reply-To: <20201215121414.253660-1-ruansy.fnst@cn.fujitsu.com>

Call the ->memory_failure() which is implemented by pmem driver, in
order to finally notify filesystem to handle the corrupted data.  The
old collecting and killing processes are moved into
mf_dax_mapping_kill_procs(), which will be called by filesystem.

Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
---
 drivers/nvdimm/pmem.c | 24 +++++++++++++++++
 mm/memory-failure.c   | 62 +++++++------------------------------------
 2 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 875076b0ea6c..4a114937c43b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -363,9 +363,33 @@ static void pmem_release_disk(void *__pmem)
 	put_disk(pmem->disk);
 }
 
+static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap,
+		unsigned long pfn, int flags)
+{
+	struct pmem_device *pdev;
+	struct gendisk *disk;
+	loff_t disk_offset;
+	int rc = 0;
+	unsigned long size = page_size(pfn_to_page(pfn));
+
+	pdev = container_of(pgmap, struct pmem_device, pgmap);
+	disk = pdev->disk;
+	if (!disk)
+		return -ENXIO;
+
+	disk_offset = PFN_PHYS(pfn) - pdev->phys_addr - pdev->data_offset;
+	if (disk->fops->corrupted_range) {
+		rc = disk->fops->corrupted_range(disk, NULL, disk_offset, size, &flags);
+		if (rc == -ENODEV)
+			rc = -ENXIO;
+	}
+	return rc;
+}
+
 static const struct dev_pagemap_ops fsdax_pagemap_ops = {
 	.kill			= pmem_pagemap_kill,
 	.cleanup		= pmem_pagemap_cleanup,
+	.memory_failure		= pmem_pagemap_memory_failure,
 };
 
 static int pmem_attach_disk(struct device *dev,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 03a4f4c1b803..10b39b14b4d7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1278,38 +1278,19 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		struct dev_pagemap *pgmap)
 {
 	struct page *page = pfn_to_page(pfn);
-	struct address_space *mapping = page->mapping;
-	pgoff_t index = page->index;
-	const bool unmap_success = true;
-	unsigned long size = 0, dummy_pfn;
-	struct to_kill *tk;
-	LIST_HEAD(to_kill);
-	int rc = -EBUSY;
-	loff_t start;
-	dax_entry_t cookie;
-
-	/*
-	 * Prevent the inode from being freed while we are interrogating
-	 * the address_space, typically this would be handled by
-	 * lock_page(), but dax pages do not use the page lock. This
-	 * also prevents changes to the mapping of this pfn until
-	 * poison signaling is complete.
-	 */
-	cookie = dax_lock(mapping, index, &dummy_pfn);
-	if (!cookie)
-		goto out;
-
-	if (hwpoison_filter(page)) {
-		rc = 0;
-		goto unlock;
-	}
+	int rc;
 
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
 		/*
 		 * TODO: Handle HMM pages which may need coordination
 		 * with device-side memory.
 		 */
-		goto unlock;
+		goto out;
+	}
+
+	if (hwpoison_filter(page)) {
+		rc = 0;
+		goto out;
 	}
 
 	/*
@@ -1318,33 +1299,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	 */
 	SetPageHWPoison(page);
 
-	/*
-	 * Unlike System-RAM there is no possibility to swap in a
-	 * different physical page at a given virtual address, so all
-	 * userspace consumption of ZONE_DEVICE memory necessitates
-	 * SIGBUS (i.e. MF_MUST_KILL)
-	 */
-	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
-	collect_procs_file(page, mapping, index, &to_kill,
-			   flags & MF_ACTION_REQUIRED);
+	/* call driver to handle the memory failure */
+	if (pgmap->ops->memory_failure)
+		rc = pgmap->ops->memory_failure(pgmap, pfn, flags);
 
-	list_for_each_entry(tk, &to_kill, nd)
-		if (tk->size_shift)
-			size = max(size, 1UL << tk->size_shift);
-	if (size) {
-		/*
-		 * Unmap the largest mapping to avoid breaking up
-		 * device-dax mappings which are constant size. The
-		 * actual size of the mapping being torn down is
-		 * communicated in siginfo, see kill_proc()
-		 */
-		start = (index << PAGE_SHIFT) & ~(size - 1);
-		unmap_mapping_range(mapping, start, start + size, 0);
-	}
-	kill_procs(&to_kill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
-	rc = 0;
-unlock:
-	dax_unlock(mapping, index, cookie);
 out:
 	/* drop pgmap ref acquired in caller */
 	put_dev_pagemap(pgmap);
-- 
2.29.2


_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

WARNING: multiple messages have this Message-ID (diff)
From: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<linux-nvdimm@lists.01.org>, <linux-mm@kvack.org>
Cc: <linux-fsdevel@vger.kernel.org>, <linux-raid@vger.kernel.org>,
	<darrick.wong@oracle.com>, <dan.j.williams@intel.com>,
	<david@fromorbit.com>, <hch@lst.de>, <song@kernel.org>,
	<rgoldwyn@suse.de>, <qi.fuli@fujitsu.com>, <y-goto@fujitsu.com>
Subject: [RFC PATCH v3 5/9] mm, pmem: Implement ->memory_failure() in pmem driver
Date: Tue, 15 Dec 2020 20:14:10 +0800	[thread overview]
Message-ID: <20201215121414.253660-6-ruansy.fnst@cn.fujitsu.com> (raw)
In-Reply-To: <20201215121414.253660-1-ruansy.fnst@cn.fujitsu.com>

Call the ->memory_failure() which is implemented by pmem driver, in
order to finally notify filesystem to handle the corrupted data.  The
old collecting and killing processes are moved into
mf_dax_mapping_kill_procs(), which will be called by filesystem.

Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
---
 drivers/nvdimm/pmem.c | 24 +++++++++++++++++
 mm/memory-failure.c   | 62 +++++++------------------------------------
 2 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 875076b0ea6c..4a114937c43b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -363,9 +363,33 @@ static void pmem_release_disk(void *__pmem)
 	put_disk(pmem->disk);
 }
 
+static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap,
+		unsigned long pfn, int flags)
+{
+	struct pmem_device *pdev;
+	struct gendisk *disk;
+	loff_t disk_offset;
+	int rc = 0;
+	unsigned long size = page_size(pfn_to_page(pfn));
+
+	pdev = container_of(pgmap, struct pmem_device, pgmap);
+	disk = pdev->disk;
+	if (!disk)
+		return -ENXIO;
+
+	disk_offset = PFN_PHYS(pfn) - pdev->phys_addr - pdev->data_offset;
+	if (disk->fops->corrupted_range) {
+		rc = disk->fops->corrupted_range(disk, NULL, disk_offset, size, &flags);
+		if (rc == -ENODEV)
+			rc = -ENXIO;
+	}
+	return rc;
+}
+
 static const struct dev_pagemap_ops fsdax_pagemap_ops = {
 	.kill			= pmem_pagemap_kill,
 	.cleanup		= pmem_pagemap_cleanup,
+	.memory_failure		= pmem_pagemap_memory_failure,
 };
 
 static int pmem_attach_disk(struct device *dev,
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 03a4f4c1b803..10b39b14b4d7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1278,38 +1278,19 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		struct dev_pagemap *pgmap)
 {
 	struct page *page = pfn_to_page(pfn);
-	struct address_space *mapping = page->mapping;
-	pgoff_t index = page->index;
-	const bool unmap_success = true;
-	unsigned long size = 0, dummy_pfn;
-	struct to_kill *tk;
-	LIST_HEAD(to_kill);
-	int rc = -EBUSY;
-	loff_t start;
-	dax_entry_t cookie;
-
-	/*
-	 * Prevent the inode from being freed while we are interrogating
-	 * the address_space, typically this would be handled by
-	 * lock_page(), but dax pages do not use the page lock. This
-	 * also prevents changes to the mapping of this pfn until
-	 * poison signaling is complete.
-	 */
-	cookie = dax_lock(mapping, index, &dummy_pfn);
-	if (!cookie)
-		goto out;
-
-	if (hwpoison_filter(page)) {
-		rc = 0;
-		goto unlock;
-	}
+	int rc;
 
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
 		/*
 		 * TODO: Handle HMM pages which may need coordination
 		 * with device-side memory.
 		 */
-		goto unlock;
+		goto out;
+	}
+
+	if (hwpoison_filter(page)) {
+		rc = 0;
+		goto out;
 	}
 
 	/*
@@ -1318,33 +1299,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	 */
 	SetPageHWPoison(page);
 
-	/*
-	 * Unlike System-RAM there is no possibility to swap in a
-	 * different physical page at a given virtual address, so all
-	 * userspace consumption of ZONE_DEVICE memory necessitates
-	 * SIGBUS (i.e. MF_MUST_KILL)
-	 */
-	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
-	collect_procs_file(page, mapping, index, &to_kill,
-			   flags & MF_ACTION_REQUIRED);
+	/* call driver to handle the memory failure */
+	if (pgmap->ops->memory_failure)
+		rc = pgmap->ops->memory_failure(pgmap, pfn, flags);
 
-	list_for_each_entry(tk, &to_kill, nd)
-		if (tk->size_shift)
-			size = max(size, 1UL << tk->size_shift);
-	if (size) {
-		/*
-		 * Unmap the largest mapping to avoid breaking up
-		 * device-dax mappings which are constant size. The
-		 * actual size of the mapping being torn down is
-		 * communicated in siginfo, see kill_proc()
-		 */
-		start = (index << PAGE_SHIFT) & ~(size - 1);
-		unmap_mapping_range(mapping, start, start + size, 0);
-	}
-	kill_procs(&to_kill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
-	rc = 0;
-unlock:
-	dax_unlock(mapping, index, cookie);
 out:
 	/* drop pgmap ref acquired in caller */
 	put_dev_pagemap(pgmap);
-- 
2.29.2




  parent reply	other threads:[~2020-12-15 12:15 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-15 12:14 [RFC PATCH v3 0/9] fsdax: introduce fs query to support reflink Shiyang Ruan
2020-12-15 12:14 ` Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 1/9] pagemap: Introduce ->memory_failure() Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 2/9] blk: Introduce ->corrupted_range() for block device Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 3/9] fs: Introduce ->corrupted_range() for superblock Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 4/9] mm, fsdax: Refactor memory-failure handler for dax mapping Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-16 21:26   ` Dave Chinner
2020-12-16 21:26     ` Dave Chinner
2020-12-18  1:48     ` Ruan Shiyang
2020-12-18  1:48       ` Ruan Shiyang
2020-12-15 12:14 ` Shiyang Ruan [this message]
2020-12-15 12:14   ` [RFC PATCH v3 5/9] mm, pmem: Implement ->memory_failure() in pmem driver Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 6/9] pmem: Implement ->corrupted_range() for " Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 7/9] dm: Introduce ->rmap() to find bdev offset Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 12:14 ` [RFC PATCH v3 8/9] md: Implement ->corrupted_range() Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 20:51   ` Darrick J. Wong
2020-12-15 20:51     ` Darrick J. Wong
2020-12-15 23:28     ` Dave Chinner
2020-12-15 23:28       ` Dave Chinner
2020-12-18  2:11     ` Ruan Shiyang
2020-12-18  2:11       ` Ruan Shiyang
2021-01-04 23:34       ` Darrick J. Wong
2021-01-04 23:34         ` Darrick J. Wong
2021-01-08  9:52         ` Ruan Shiyang
2021-01-08  9:52           ` Ruan Shiyang
2021-01-08 19:05           ` Darrick J. Wong
2021-01-08 19:05             ` Darrick J. Wong
2020-12-16  5:43   ` Jane Chu
2020-12-16  5:43     ` Jane Chu
2020-12-18  1:50     ` Ruan Shiyang
2020-12-18  1:50       ` Ruan Shiyang
2020-12-15 12:14 ` [RFC PATCH v3 9/9] xfs: Implement ->corrupted_range() for XFS Shiyang Ruan
2020-12-15 12:14   ` Shiyang Ruan
2020-12-15 20:40   ` Darrick J. Wong
2020-12-15 20:40     ` Darrick J. Wong
2020-12-18  2:31     ` Ruan Shiyang
2020-12-18  2:31       ` Ruan Shiyang
2020-12-16 20:55 ` [RFC PATCH v3 0/9] fsdax: introduce fs query to support reflink Jane Chu
2020-12-16 20:55   ` Jane Chu
2020-12-18  2:44   ` Ruan Shiyang
2020-12-18  2:44     ` Ruan Shiyang
2020-12-18  3:49     ` Darrick J. Wong
2020-12-18  3:49       ` Darrick J. Wong
2020-12-18  9:13       ` Ruan Shiyang
2020-12-18  9:13         ` Ruan Shiyang
2021-01-08 18:14         ` Jane Chu
2021-01-08 18:14           ` Jane Chu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201215121414.253660-6-ruansy.fnst@cn.fujitsu.com \
    --to=ruansy.fnst@cn.fujitsu.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=qi.fuli@fujitsu.com \
    --cc=rgoldwyn@suse.de \
    --cc=song@kernel.org \
    --cc=y-goto@fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.