linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	"Darrick J. Wong" <djwong@kernel.org>,
	Jason Gunthorpe <jgg@nvidia.com>, Christoph Hellwig <hch@lst.de>,
	John Hubbard <jhubbard@nvidia.com>,
	linux-fsdevel@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-xfs@vger.kernel.org, linux-mm@kvack.org,
	linux-ext4@vger.kernel.org
Subject: [PATCH v2 10/18] fsdax: Manage pgmap references at entry insertion and deletion
Date: Thu, 15 Sep 2022 20:36:07 -0700	[thread overview]
Message-ID: <166329936739.2786261.14035402420254589047.stgit@dwillia2-xfh.jf.intel.com> (raw)
In-Reply-To: <166329930818.2786261.6086109734008025807.stgit@dwillia2-xfh.jf.intel.com>

The percpu_ref in 'struct dev_pagemap' is used to coordinate active
mappings of device-memory with the device-removal / unbind path. It
enables the semantic that initiating device-removal (or
device-driver-unbind) blocks new mapping and DMA attempts, and waits for
mapping revocation or inflight DMA to complete.

Expand the scope of the reference count to pin the DAX device active at
mapping time and not later at the first gup event. With a device
reference being held while any page on that device is mapped the need to
manage pgmap reference counts in the gup code is eliminated. That
cleanup is saved for a follow-on change.

For now, teach dax_insert_entry() and dax_delete_mapping_entry() to take
and drop pgmap references respectively. Where dax_insert_entry() is
called to take the initial reference on the page, and
dax_delete_mapping_entry() is called once there are no outstanding
references to the given page(s).

Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c                 |   34 ++++++++++++++++++++++++++++------
 include/linux/memremap.h |   18 ++++++++++++++----
 mm/memremap.c            |   13 ++++++++-----
 3 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5d9f30105db4..ee2568c8b135 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -376,14 +376,26 @@ static inline void dax_mapping_set_cow(struct page *page)
  * whether this entry is shared by multiple files.  If so, set the page->mapping
  * FS_DAX_MAPPING_COW, and use page->index as refcount.
  */
-static void dax_associate_entry(void *entry, struct address_space *mapping,
-				struct vm_fault *vmf, unsigned long flags)
+static vm_fault_t dax_associate_entry(void *entry,
+				      struct address_space *mapping,
+				      struct vm_fault *vmf, unsigned long flags)
 {
 	unsigned long size = dax_entry_size(entry), pfn, index;
+	struct dev_pagemap *pgmap;
 	int i = 0;
 
 	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
-		return;
+		return 0;
+
+	if (!size)
+		return 0;
+
+	if (!(flags & DAX_COW)) {
+		pfn = dax_to_pfn(entry);
+		pgmap = get_dev_pagemap_many(pfn, NULL, PHYS_PFN(size));
+		if (!pgmap)
+			return VM_FAULT_SIGBUS;
+	}
 
 	index = linear_page_index(vmf->vma, ALIGN(vmf->address, size));
 	for_each_mapped_pfn(entry, pfn) {
@@ -398,19 +410,24 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
 			page_ref_inc(page);
 		}
 	}
+
+	return 0;
 }
 
 static void dax_disassociate_entry(void *entry, struct address_space *mapping,
 		bool trunc)
 {
-	unsigned long pfn;
+	unsigned long size = dax_entry_size(entry), pfn;
+	struct page *page;
 
 	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
 		return;
 
-	for_each_mapped_pfn(entry, pfn) {
-		struct page *page = pfn_to_page(pfn);
+	if (!size)
+		return;
 
+	for_each_mapped_pfn(entry, pfn) {
+		page = pfn_to_page(pfn);
 		if (dax_mapping_is_cow(page->mapping)) {
 			/* keep the CoW flag if this page is still shared */
 			if (page->index-- > 0)
@@ -423,6 +440,11 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
 		page->mapping = NULL;
 		page->index = 0;
 	}
+
+	if (trunc && !dax_mapping_is_cow(page->mapping)) {
+		page = pfn_to_page(dax_to_pfn(entry));
+		put_dev_pagemap_many(page->pgmap, PHYS_PFN(size));
+	}
 }
 
 /*
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index c3b4cc84877b..fd57407e7f3d 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -191,8 +191,13 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid);
 void memunmap_pages(struct dev_pagemap *pgmap);
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-		struct dev_pagemap *pgmap);
+struct dev_pagemap *get_dev_pagemap_many(unsigned long pfn,
+					 struct dev_pagemap *pgmap, int refs);
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+						  struct dev_pagemap *pgmap)
+{
+	return get_dev_pagemap_many(pfn, pgmap, 1);
+}
 bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
 
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
@@ -244,10 +249,15 @@ static inline unsigned long memremap_compat_align(void)
 }
 #endif /* CONFIG_ZONE_DEVICE */
 
-static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
+static inline void put_dev_pagemap_many(struct dev_pagemap *pgmap, int refs)
 {
 	if (pgmap)
-		percpu_ref_put(&pgmap->ref);
+		percpu_ref_put_many(&pgmap->ref, refs);
+}
+
+static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
+{
+	put_dev_pagemap_many(pgmap, 1);
 }
 
 #endif /* _LINUX_MEMREMAP_H_ */
diff --git a/mm/memremap.c b/mm/memremap.c
index 95f6ffe9cb0f..83c5e6fafd84 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -430,15 +430,16 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
 }
 
 /**
- * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * get_dev_pagemap_many() - take new live references(s) on the dev_pagemap for @pfn
  * @pfn: page frame number to lookup page_map
  * @pgmap: optional known pgmap that already has a reference
+ * @refs: number of references to take
  *
  * If @pgmap is non-NULL and covers @pfn it will be returned as-is.  If @pgmap
  * is non-NULL but does not cover @pfn the reference to it will be released.
  */
-struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
-		struct dev_pagemap *pgmap)
+struct dev_pagemap *get_dev_pagemap_many(unsigned long pfn,
+					 struct dev_pagemap *pgmap, int refs)
 {
 	resource_size_t phys = PFN_PHYS(pfn);
 
@@ -454,13 +455,15 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 	/* fall back to slow path lookup */
 	rcu_read_lock();
 	pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
-	if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
+	if (pgmap && !percpu_ref_tryget_live_rcu(&pgmap->ref))
 		pgmap = NULL;
+	if (pgmap && refs > 1)
+		percpu_ref_get_many(&pgmap->ref, refs - 1);
 	rcu_read_unlock();
 
 	return pgmap;
 }
-EXPORT_SYMBOL_GPL(get_dev_pagemap);
+EXPORT_SYMBOL_GPL(get_dev_pagemap_many);
 
 void free_zone_device_page(struct page *page)
 {


  parent reply	other threads:[~2022-09-16  3:36 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-16  3:35 [PATCH v2 00/18] Fix the DAX-gup mistake Dan Williams
2022-09-16  3:35 ` [PATCH v2 01/18] fsdax: Wait on @page not @page->_refcount Dan Williams
2022-09-20 14:30   ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 02/18] fsdax: Use dax_page_idle() to document DAX busy page checking Dan Williams
2022-09-20 14:31   ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 03/18] fsdax: Include unmapped inodes for page-idle detection Dan Williams
2022-09-16  3:35 ` [PATCH v2 04/18] ext4: Add ext4_break_layouts() to the inode eviction path Dan Williams
2022-09-16  3:35 ` [PATCH v2 05/18] xfs: Add xfs_break_layouts() " Dan Williams
2022-09-18 22:57   ` Dave Chinner
2022-09-19 16:11     ` Dan Williams
2022-09-19 21:29       ` Dave Chinner
2022-09-20 16:44         ` Dan Williams
2022-09-21 22:14           ` Dave Chinner
2022-09-21 22:28             ` Jason Gunthorpe
2022-09-23  0:18               ` Dave Chinner
2022-09-23  0:41                 ` Dan Williams
2022-09-23  2:10                   ` Dave Chinner
2022-09-23  9:38                     ` Jan Kara
2022-09-23 23:06                       ` Dan Williams
2022-09-25 23:54                       ` Dave Chinner
2022-09-26 14:10                         ` Jan Kara
2022-09-29 23:33                           ` Dan Williams
2022-09-30 13:41                             ` Jan Kara
2022-09-30 17:56                               ` Dan Williams
2022-09-30 18:06                                 ` Jason Gunthorpe
2022-09-30 18:46                                   ` Dan Williams
2022-10-03  7:55                                   ` Jan Kara
2022-09-23 12:39                     ` Jason Gunthorpe
2022-09-26  0:34                       ` Dave Chinner
2022-09-26 13:04                         ` Jason Gunthorpe
2022-09-22  0:02             ` Dan Williams
2022-09-22  0:10               ` Jason Gunthorpe
2022-09-16  3:35 ` [PATCH v2 06/18] fsdax: Rework dax_layout_busy_page() to dax_zap_mappings() Dan Williams
2022-09-16  3:35 ` [PATCH v2 07/18] fsdax: Update dax_insert_entry() calling convention to return an error Dan Williams
2022-09-16  3:35 ` [PATCH v2 08/18] fsdax: Cleanup dax_associate_entry() Dan Williams
2022-09-16  3:36 ` [PATCH v2 09/18] fsdax: Rework dax_insert_entry() calling convention Dan Williams
2022-09-16  3:36 ` Dan Williams [this message]
2022-09-21 14:03   ` [PATCH v2 10/18] fsdax: Manage pgmap references at entry insertion and deletion Jason Gunthorpe
2022-09-21 15:18     ` Dan Williams
2022-09-21 21:38       ` Dan Williams
2022-09-21 22:07         ` Jason Gunthorpe
2022-09-22  0:14           ` Dan Williams
2022-09-22  0:25             ` Jason Gunthorpe
2022-09-22  2:17               ` Dan Williams
2022-09-22 17:55                 ` Jason Gunthorpe
2022-09-22 21:54                   ` Dan Williams
2022-09-23  1:36                     ` Dave Chinner
2022-09-23  2:01                       ` Dan Williams
2022-09-23 13:24                     ` Jason Gunthorpe
2022-09-23 16:29                       ` Dan Williams
2022-09-23 17:42                         ` Jason Gunthorpe
2022-09-23 19:03                           ` Dan Williams
2022-09-23 19:23                             ` Jason Gunthorpe
2022-09-27  6:07                             ` Alistair Popple
2022-09-27 12:56                               ` Jason Gunthorpe
2022-09-16  3:36 ` [PATCH v2 11/18] devdax: Minor warning fixups Dan Williams
2022-09-16  3:36 ` [PATCH v2 12/18] devdax: Move address_space helpers to the DAX core Dan Williams
2022-09-27  6:20   ` Alistair Popple
2022-09-29 22:38     ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 13/18] dax: Prep mapping helpers for compound pages Dan Williams
2022-09-21 14:06   ` Jason Gunthorpe
2022-09-21 15:19     ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 14/18] devdax: add PUD support to the DAX mapping infrastructure Dan Williams
2022-09-16  3:36 ` [PATCH v2 15/18] devdax: Use dax_insert_entry() + dax_delete_mapping_entry() Dan Williams
2022-09-21 14:10   ` Jason Gunthorpe
2022-09-21 15:48     ` Dan Williams
2022-09-21 22:23       ` Jason Gunthorpe
2022-09-22  0:15         ` Dan Williams
2022-09-16  3:36 ` [PATCH v2 16/18] mm/memremap_pages: Support initializing pages to a zero reference count Dan Williams
2022-09-21 15:24   ` Jason Gunthorpe
2022-09-21 23:45     ` Dan Williams
2022-09-22  0:03       ` Alistair Popple
2022-09-22  0:04       ` Jason Gunthorpe
2022-09-22  0:34         ` Dan Williams
2022-09-22  1:36           ` Alistair Popple
2022-09-22  2:34             ` Dan Williams
2022-09-26  6:17               ` Alistair Popple
2022-09-22  0:13       ` John Hubbard
2022-09-16  3:36 ` [PATCH v2 17/18] fsdax: Delete put_devmap_managed_page_refs() Dan Williams
2022-09-16  3:36 ` [PATCH v2 18/18] mm/gup: Drop DAX pgmap accounting Dan Williams
2022-09-20 14:29 ` [PATCH v2 00/18] Fix the DAX-gup mistake Jason Gunthorpe
2022-09-20 16:50   ` Dan Williams
2022-11-09  0:20 ` Andrew Morton
2022-11-09 11:38   ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=166329936739.2786261.14035402420254589047.stgit@dwillia2-xfh.jf.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=jgg@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=nvdimm@lists.linux.dev \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).