All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: "Michal Hocko" <mhocko@suse.com>,
	jack@suse.cz, snitzer@redhat.com, david@fromorbit.com,
	"Heiko Carstens" <heiko.carstens@de.ibm.com>,
	linux-kernel@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,
	"Jérôme Glisse" <jglisse@redhat.com>,
	"Martin Schwidefsky" <schwidefsky@de.ibm.com>,
	"Christoph Hellwig" <hch@lst.de>,
	"Thomas Meyer" <thomas@m3y3r.de>
Subject: [PATCH v8 13/18] mm, dev_pagemap: introduce CONFIG_DEV_PAGEMAP_OPS
Date: Fri, 30 Mar 2018 21:03:19 -0700	[thread overview]
Message-ID: <152246899919.36038.9145086226090928099.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <152246892890.36038.18436540150980653229.stgit@dwillia2-desk3.amr.corp.intel.com>

The HMM sub-system extended dev_pagemap to arrange a callback when a
dev_pagemap managed page is freed. Since a dev_pagemap page is free /
idle when its reference count is 1 it requires an additional branch to
check the page-type at put_page() time. Given put_page() is a hot-path
we do not want to incur that check if HMM is not in use, so a static
branch is used to avoid that overhead when not necessary.

Now, the FS_DAX implementation wants to reuse this mechanism for
receiving dev_pagemap ->page_free() callbacks. Rework the HMM-specific
static-key into a generic mechanism that either HMM or FS_DAX code paths
can enable.

For ARCH=um builds, and any other arch that lacks ZONE_DEVICE support,
care must be taken to compile out the DEV_PAGEMAP_OPS infrastructure.
However, we still need to support FS_DAX in the FS_DAX_LIMITED case
implemented by the s390/dcssblk driver.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Reported-by: Thomas Meyer <thomas@m3y3r.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c      |    4 ++-
 fs/Kconfig               |    1 +
 include/linux/dax.h      |   35 ++++++++++++++++-------
 include/linux/memremap.h |   17 -----------
 include/linux/mm.h       |   71 ++++++++++++++++++++++++++++++++++------------
 kernel/memremap.c        |   30 +++++++++++++++++--
 mm/Kconfig               |    5 +++
 mm/hmm.c                 |   13 +-------
 mm/swap.c                |    3 +-
 9 files changed, 116 insertions(+), 63 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 7d260f118a39..3bafaddd02f1 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -164,7 +164,7 @@ struct dax_device {
 	const struct dax_operations *ops;
 };
 
-#if IS_ENABLED(CONFIG_FS_DAX)
+#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS)
 static void generic_dax_pagefree(struct page *page, void *data)
 {
 	/* TODO: wakeup page-idle waiters */
@@ -190,6 +190,7 @@ struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner)
 		return NULL;
 	}
 
+	dev_pagemap_get_ops();
 	pgmap->type = MEMORY_DEVICE_FS_DAX;
 	pgmap->page_free = generic_dax_pagefree;
 	pgmap->data = owner;
@@ -228,6 +229,7 @@ void __fs_dax_release(struct dax_device *dax_dev, void *owner)
 	pgmap->type = MEMORY_DEVICE_HOST;
 	pgmap->page_free = NULL;
 	pgmap->data = NULL;
+	dev_pagemap_put_ops();
 	mutex_unlock(&devmap_lock);
 }
 EXPORT_SYMBOL_GPL(__fs_dax_release);
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a86d965..1e050e012eb9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
 	bool "Direct Access (DAX) support"
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
+	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
 	select FS_IOMAP
 	select DAX
 	help
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a88ff009e2a1..a36b74aa96e8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -4,6 +4,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/genhd.h>
 #include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
@@ -87,12 +88,8 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
 	return dax_get_by_host(host);
 }
 
-struct dax_device *fs_dax_claim_bdev(struct block_device *bdev, void *owner);
-void fs_dax_release(struct dax_device *dax_dev, void *owner);
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
-struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner);
-void __fs_dax_release(struct dax_device *dax_dev, void *owner);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -104,26 +101,42 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
 	return NULL;
 }
 
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS)
+struct dax_device *fs_dax_claim_bdev(struct block_device *bdev, void *owner);
+struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner);
+void __fs_dax_release(struct dax_device *dax_dev, void *owner);
+void fs_dax_release(struct dax_device *dax_dev, void *owner);
+#else
+#ifdef CONFIG_BLOCK
 static inline struct dax_device *fs_dax_claim_bdev(struct block_device *bdev,
 		void *owner)
 {
-	return NULL;
+	return fs_dax_get_by_host(bdev->bd_disk->disk_name);
 }
-
-static inline void fs_dax_release(struct dax_device *dax_dev, void *owner)
+#else
+static inline struct dax_device *fs_dax_claim_bdev(struct block_device *bdev,
+		void *owner)
 {
+	return NULL;
 }
+#endif
 
-static inline int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc)
+static inline void fs_dax_release(struct dax_device *dax_dev, void *owner)
 {
-	return -EOPNOTSUPP;
+	put_dax(dax_dev);
 }
 
 static inline struct dax_device *fs_dax_claim(struct dax_device *dax_dev,
 		void *owner)
 {
-	return NULL;
+	return dax_dev;
 }
 
 static inline void __fs_dax_release(struct dax_device *dax_dev, void *owner)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 02d6d042ee7f..8cc619fe347b 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_MEMREMAP_H_
 #define _LINUX_MEMREMAP_H_
-#include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/percpu-refcount.h>
 
@@ -137,8 +136,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-
-static inline bool is_zone_device_page(const struct page *page);
 #else
 static inline void *devm_memremap_pages(struct device *dev,
 		struct dev_pagemap *pgmap)
@@ -169,20 +166,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
 }
 #endif /* CONFIG_ZONE_DEVICE */
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-static inline bool is_device_private_page(const struct page *page)
-{
-	return is_zone_device_page(page) &&
-		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
-}
-
-static inline bool is_device_public_page(const struct page *page)
-{
-	return is_zone_device_page(page) &&
-		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
-}
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
 static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
 {
 	if (pgmap)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad06d42adb1a..be9969e3cf09 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -812,27 +812,65 @@ static inline bool is_zone_device_page(const struct page *page)
 }
 #endif
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page);
-DECLARE_STATIC_KEY_FALSE(device_private_key);
-#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
-static inline bool is_device_private_page(const struct page *page);
-static inline bool is_device_public_page(const struct page *page);
-#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-static inline void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+void dev_pagemap_get_ops(void);
+void dev_pagemap_put_ops(void);
+void __put_devmap_managed_page(struct page *page);
+DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
+static inline bool put_devmap_managed_page(struct page *page)
+{
+	if (!static_branch_unlikely(&devmap_managed_key))
+		return false;
+	if (!is_zone_device_page(page))
+		return false;
+	switch (page->pgmap->type) {
+	case MEMORY_DEVICE_PRIVATE:
+	case MEMORY_DEVICE_PUBLIC:
+	case MEMORY_DEVICE_FS_DAX:
+		__put_devmap_managed_page(page);
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static inline bool is_device_private_page(const struct page *page)
 {
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
-#define IS_HMM_ENABLED 0
+
+static inline bool is_device_public_page(const struct page *page)
+{
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
+}
+
+#else /* CONFIG_DEV_PAGEMAP_OPS */
+static inline void dev_pagemap_get_ops(void)
+{
+}
+
+static inline void dev_pagemap_put_ops(void)
+{
+}
+
+static inline bool put_devmap_managed_page(struct page *page)
+{
+	return false;
+}
+
 static inline bool is_device_private_page(const struct page *page)
 {
 	return false;
 }
+
 static inline bool is_device_public_page(const struct page *page)
 {
 	return false;
 }
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
 
 static inline void get_page(struct page *page)
 {
@@ -850,16 +888,13 @@ static inline void put_page(struct page *page)
 	page = compound_head(page);
 
 	/*
-	 * For private device pages we need to catch refcount transition from
-	 * 2 to 1, when refcount reach one it means the private device page is
-	 * free and we need to inform the device driver through callback. See
+	 * For devmap managed pages we need to catch refcount transition from
+	 * 2 to 1, when refcount reach one it means the page is free and we
+	 * need to inform the device driver through callback. See
 	 * include/linux/memremap.h and HMM for details.
 	 */
-	if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) ||
-	    unlikely(is_device_public_page(page)))) {
-		put_zone_device_private_or_public_page(page);
+	if (put_devmap_managed_page(page))
 		return;
-	}
 
 	if (put_page_testzero(page))
 		__put_page(page);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 52a2742f527f..07a6a405cf3d 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -302,8 +302,30 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 	return pgmap;
 }
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL_GPL(devmap_managed_key);
+static atomic_t devmap_enable;
+
+/*
+ * Toggle the static key for ->page_free() callbacks when dev_pagemap
+ * pages go idle.
+ */
+void dev_pagemap_get_ops(void)
+{
+	if (atomic_inc_return(&devmap_enable) == 1)
+		static_branch_enable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_get_ops);
+
+void dev_pagemap_put_ops(void)
+{
+	if (atomic_dec_and_test(&devmap_enable))
+		static_branch_disable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_put_ops);
+
+void __put_devmap_managed_page(struct page *page)
 {
 	int count = page_ref_dec_return(page);
 
@@ -323,5 +345,5 @@ void put_zone_device_private_or_public_page(struct page *page)
 	} else if (!count)
 		__put_page(page);
 }
-EXPORT_SYMBOL(put_zone_device_private_or_public_page);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
+EXPORT_SYMBOL_GPL(__put_devmap_managed_page);
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
diff --git a/mm/Kconfig b/mm/Kconfig
index c782e8fb7235..dc32828984a3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -700,6 +700,9 @@ config ARCH_HAS_HMM
 config MIGRATE_VMA_HELPER
 	bool
 
+config DEV_PAGEMAP_OPS
+	bool
+
 config HMM
 	bool
 	select MIGRATE_VMA_HELPER
@@ -720,6 +723,7 @@ config DEVICE_PRIVATE
 	bool "Unaddressable device memory (GPU memory, ...)"
 	depends on ARCH_HAS_HMM
 	select HMM
+	select DEV_PAGEMAP_OPS
 
 	help
 	  Allows creation of struct pages to represent unaddressable device
@@ -730,6 +734,7 @@ config DEVICE_PUBLIC
 	bool "Addressable device memory (like GPU memory)"
 	depends on ARCH_HAS_HMM
 	select HMM
+	select DEV_PAGEMAP_OPS
 
 	help
 	  Allows creation of struct pages to represent addressable device
diff --git a/mm/hmm.c b/mm/hmm.c
index 320545b98ff5..4aa554e76d06 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -35,15 +35,6 @@
 
 #define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-/*
- * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
- */
-DEFINE_STATIC_KEY_FALSE(device_private_key);
-EXPORT_SYMBOL(device_private_key);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
-
 #if IS_ENABLED(CONFIG_HMM_MIRROR)
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
 
@@ -996,7 +987,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	resource_size_t addr;
 	int ret;
 
-	static_branch_enable(&device_private_key);
+	dev_pagemap_get_ops();
 
 	devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
 				   GFP_KERNEL, dev_to_node(device));
@@ -1090,7 +1081,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
 	if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
 		return ERR_PTR(-EINVAL);
 
-	static_branch_enable(&device_private_key);
+	dev_pagemap_get_ops();
 
 	devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
 				   GFP_KERNEL, dev_to_node(device));
diff --git a/mm/swap.c b/mm/swap.c
index 0f17330dd0e5..eed846cfc8b8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -29,6 +29,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/backing-dev.h>
+#include <linux/memremap.h>
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
@@ -744,7 +745,7 @@ void release_pages(struct page **pages, int nr)
 						       flags);
 				locked_pgdat = NULL;
 			}
-			put_zone_device_private_or_public_page(page);
+			put_devmap_managed_page(page);
 			continue;
 		}
 

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: "Martin Schwidefsky" <schwidefsky@de.ibm.com>,
	"Heiko Carstens" <heiko.carstens@de.ibm.com>,
	"Michal Hocko" <mhocko@suse.com>,
	"Thomas Meyer" <thomas@m3y3r.de>,
	"Christoph Hellwig" <hch@lst.de>,
	"Jérôme Glisse" <jglisse@redhat.com>, "Jan Kara" <jack@suse.cz>,
	david@fromorbit.com, linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-kernel@vger.kernel.org,
	jack@suse.cz, snitzer@redhat.com
Subject: [PATCH v8 13/18] mm, dev_pagemap: introduce CONFIG_DEV_PAGEMAP_OPS
Date: Fri, 30 Mar 2018 21:03:19 -0700	[thread overview]
Message-ID: <152246899919.36038.9145086226090928099.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <152246892890.36038.18436540150980653229.stgit@dwillia2-desk3.amr.corp.intel.com>

The HMM sub-system extended dev_pagemap to arrange a callback when a
dev_pagemap managed page is freed. Since a dev_pagemap page is free /
idle when its reference count is 1 it requires an additional branch to
check the page-type at put_page() time. Given put_page() is a hot-path
we do not want to incur that check if HMM is not in use, so a static
branch is used to avoid that overhead when not necessary.

Now, the FS_DAX implementation wants to reuse this mechanism for
receiving dev_pagemap ->page_free() callbacks. Rework the HMM-specific
static-key into a generic mechanism that either HMM or FS_DAX code paths
can enable.

For ARCH=um builds, and any other arch that lacks ZONE_DEVICE support,
care must be taken to compile out the DEV_PAGEMAP_OPS infrastructure.
However, we still need to support FS_DAX in the FS_DAX_LIMITED case
implemented by the s390/dcssblk driver.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Reported-by: Thomas Meyer <thomas@m3y3r.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c      |    4 ++-
 fs/Kconfig               |    1 +
 include/linux/dax.h      |   35 ++++++++++++++++-------
 include/linux/memremap.h |   17 -----------
 include/linux/mm.h       |   71 ++++++++++++++++++++++++++++++++++------------
 kernel/memremap.c        |   30 +++++++++++++++++--
 mm/Kconfig               |    5 +++
 mm/hmm.c                 |   13 +-------
 mm/swap.c                |    3 +-
 9 files changed, 116 insertions(+), 63 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 7d260f118a39..3bafaddd02f1 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -164,7 +164,7 @@ struct dax_device {
 	const struct dax_operations *ops;
 };
 
-#if IS_ENABLED(CONFIG_FS_DAX)
+#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS)
 static void generic_dax_pagefree(struct page *page, void *data)
 {
 	/* TODO: wakeup page-idle waiters */
@@ -190,6 +190,7 @@ struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner)
 		return NULL;
 	}
 
+	dev_pagemap_get_ops();
 	pgmap->type = MEMORY_DEVICE_FS_DAX;
 	pgmap->page_free = generic_dax_pagefree;
 	pgmap->data = owner;
@@ -228,6 +229,7 @@ void __fs_dax_release(struct dax_device *dax_dev, void *owner)
 	pgmap->type = MEMORY_DEVICE_HOST;
 	pgmap->page_free = NULL;
 	pgmap->data = NULL;
+	dev_pagemap_put_ops();
 	mutex_unlock(&devmap_lock);
 }
 EXPORT_SYMBOL_GPL(__fs_dax_release);
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a86d965..1e050e012eb9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
 	bool "Direct Access (DAX) support"
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
+	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
 	select FS_IOMAP
 	select DAX
 	help
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a88ff009e2a1..a36b74aa96e8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -4,6 +4,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/genhd.h>
 #include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
@@ -87,12 +88,8 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
 	return dax_get_by_host(host);
 }
 
-struct dax_device *fs_dax_claim_bdev(struct block_device *bdev, void *owner);
-void fs_dax_release(struct dax_device *dax_dev, void *owner);
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
-struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner);
-void __fs_dax_release(struct dax_device *dax_dev, void *owner);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -104,26 +101,42 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
 	return NULL;
 }
 
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS)
+struct dax_device *fs_dax_claim_bdev(struct block_device *bdev, void *owner);
+struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner);
+void __fs_dax_release(struct dax_device *dax_dev, void *owner);
+void fs_dax_release(struct dax_device *dax_dev, void *owner);
+#else
+#ifdef CONFIG_BLOCK
 static inline struct dax_device *fs_dax_claim_bdev(struct block_device *bdev,
 		void *owner)
 {
-	return NULL;
+	return fs_dax_get_by_host(bdev->bd_disk->disk_name);
 }
-
-static inline void fs_dax_release(struct dax_device *dax_dev, void *owner)
+#else
+static inline struct dax_device *fs_dax_claim_bdev(struct block_device *bdev,
+		void *owner)
 {
+	return NULL;
 }
+#endif
 
-static inline int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc)
+static inline void fs_dax_release(struct dax_device *dax_dev, void *owner)
 {
-	return -EOPNOTSUPP;
+	put_dax(dax_dev);
 }
 
 static inline struct dax_device *fs_dax_claim(struct dax_device *dax_dev,
 		void *owner)
 {
-	return NULL;
+	return dax_dev;
 }
 
 static inline void __fs_dax_release(struct dax_device *dax_dev, void *owner)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 02d6d042ee7f..8cc619fe347b 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_MEMREMAP_H_
 #define _LINUX_MEMREMAP_H_
-#include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/percpu-refcount.h>
 
@@ -137,8 +136,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-
-static inline bool is_zone_device_page(const struct page *page);
 #else
 static inline void *devm_memremap_pages(struct device *dev,
 		struct dev_pagemap *pgmap)
@@ -169,20 +166,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
 }
 #endif /* CONFIG_ZONE_DEVICE */
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-static inline bool is_device_private_page(const struct page *page)
-{
-	return is_zone_device_page(page) &&
-		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
-}
-
-static inline bool is_device_public_page(const struct page *page)
-{
-	return is_zone_device_page(page) &&
-		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
-}
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
 static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
 {
 	if (pgmap)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad06d42adb1a..be9969e3cf09 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -812,27 +812,65 @@ static inline bool is_zone_device_page(const struct page *page)
 }
 #endif
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page);
-DECLARE_STATIC_KEY_FALSE(device_private_key);
-#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
-static inline bool is_device_private_page(const struct page *page);
-static inline bool is_device_public_page(const struct page *page);
-#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-static inline void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+void dev_pagemap_get_ops(void);
+void dev_pagemap_put_ops(void);
+void __put_devmap_managed_page(struct page *page);
+DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
+static inline bool put_devmap_managed_page(struct page *page)
+{
+	if (!static_branch_unlikely(&devmap_managed_key))
+		return false;
+	if (!is_zone_device_page(page))
+		return false;
+	switch (page->pgmap->type) {
+	case MEMORY_DEVICE_PRIVATE:
+	case MEMORY_DEVICE_PUBLIC:
+	case MEMORY_DEVICE_FS_DAX:
+		__put_devmap_managed_page(page);
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static inline bool is_device_private_page(const struct page *page)
 {
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
-#define IS_HMM_ENABLED 0
+
+static inline bool is_device_public_page(const struct page *page)
+{
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
+}
+
+#else /* CONFIG_DEV_PAGEMAP_OPS */
+static inline void dev_pagemap_get_ops(void)
+{
+}
+
+static inline void dev_pagemap_put_ops(void)
+{
+}
+
+static inline bool put_devmap_managed_page(struct page *page)
+{
+	return false;
+}
+
 static inline bool is_device_private_page(const struct page *page)
 {
 	return false;
 }
+
 static inline bool is_device_public_page(const struct page *page)
 {
 	return false;
 }
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
 
 static inline void get_page(struct page *page)
 {
@@ -850,16 +888,13 @@ static inline void put_page(struct page *page)
 	page = compound_head(page);
 
 	/*
-	 * For private device pages we need to catch refcount transition from
-	 * 2 to 1, when refcount reach one it means the private device page is
-	 * free and we need to inform the device driver through callback. See
+	 * For devmap managed pages we need to catch refcount transition from
+	 * 2 to 1, when refcount reach one it means the page is free and we
+	 * need to inform the device driver through callback. See
 	 * include/linux/memremap.h and HMM for details.
 	 */
-	if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) ||
-	    unlikely(is_device_public_page(page)))) {
-		put_zone_device_private_or_public_page(page);
+	if (put_devmap_managed_page(page))
 		return;
-	}
 
 	if (put_page_testzero(page))
 		__put_page(page);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 52a2742f527f..07a6a405cf3d 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -302,8 +302,30 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 	return pgmap;
 }
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL_GPL(devmap_managed_key);
+static atomic_t devmap_enable;
+
+/*
+ * Toggle the static key for ->page_free() callbacks when dev_pagemap
+ * pages go idle.
+ */
+void dev_pagemap_get_ops(void)
+{
+	if (atomic_inc_return(&devmap_enable) == 1)
+		static_branch_enable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_get_ops);
+
+void dev_pagemap_put_ops(void)
+{
+	if (atomic_dec_and_test(&devmap_enable))
+		static_branch_disable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_put_ops);
+
+void __put_devmap_managed_page(struct page *page)
 {
 	int count = page_ref_dec_return(page);
 
@@ -323,5 +345,5 @@ void put_zone_device_private_or_public_page(struct page *page)
 	} else if (!count)
 		__put_page(page);
 }
-EXPORT_SYMBOL(put_zone_device_private_or_public_page);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
+EXPORT_SYMBOL_GPL(__put_devmap_managed_page);
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
diff --git a/mm/Kconfig b/mm/Kconfig
index c782e8fb7235..dc32828984a3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -700,6 +700,9 @@ config ARCH_HAS_HMM
 config MIGRATE_VMA_HELPER
 	bool
 
+config DEV_PAGEMAP_OPS
+	bool
+
 config HMM
 	bool
 	select MIGRATE_VMA_HELPER
@@ -720,6 +723,7 @@ config DEVICE_PRIVATE
 	bool "Unaddressable device memory (GPU memory, ...)"
 	depends on ARCH_HAS_HMM
 	select HMM
+	select DEV_PAGEMAP_OPS
 
 	help
 	  Allows creation of struct pages to represent unaddressable device
@@ -730,6 +734,7 @@ config DEVICE_PUBLIC
 	bool "Addressable device memory (like GPU memory)"
 	depends on ARCH_HAS_HMM
 	select HMM
+	select DEV_PAGEMAP_OPS
 
 	help
 	  Allows creation of struct pages to represent addressable device
diff --git a/mm/hmm.c b/mm/hmm.c
index 320545b98ff5..4aa554e76d06 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -35,15 +35,6 @@
 
 #define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-/*
- * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
- */
-DEFINE_STATIC_KEY_FALSE(device_private_key);
-EXPORT_SYMBOL(device_private_key);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
-
 #if IS_ENABLED(CONFIG_HMM_MIRROR)
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
 
@@ -996,7 +987,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	resource_size_t addr;
 	int ret;
 
-	static_branch_enable(&device_private_key);
+	dev_pagemap_get_ops();
 
 	devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
 				   GFP_KERNEL, dev_to_node(device));
@@ -1090,7 +1081,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
 	if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
 		return ERR_PTR(-EINVAL);
 
-	static_branch_enable(&device_private_key);
+	dev_pagemap_get_ops();
 
 	devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
 				   GFP_KERNEL, dev_to_node(device));
diff --git a/mm/swap.c b/mm/swap.c
index 0f17330dd0e5..eed846cfc8b8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -29,6 +29,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/backing-dev.h>
+#include <linux/memremap.h>
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
@@ -744,7 +745,7 @@ void release_pages(struct page **pages, int nr)
 						       flags);
 				locked_pgdat = NULL;
 			}
-			put_zone_device_private_or_public_page(page);
+			put_devmap_managed_page(page);
 			continue;
 		}
 

WARNING: multiple messages have this Message-ID (diff)
From: Dan Williams <dan.j.williams@intel.com>
To: linux-nvdimm@lists.01.org
Cc: "Martin Schwidefsky" <schwidefsky@de.ibm.com>,
	"Heiko Carstens" <heiko.carstens@de.ibm.com>,
	"Michal Hocko" <mhocko@suse.com>,
	"Thomas Meyer" <thomas@m3y3r.de>,
	"Christoph Hellwig" <hch@lst.de>,
	"Jérôme Glisse" <jglisse@redhat.com>, "Jan Kara" <jack@suse.cz>,
	david@fromorbit.com, linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-kernel@vger.kernel.orgjack
Subject: [PATCH v8 13/18] mm, dev_pagemap: introduce CONFIG_DEV_PAGEMAP_OPS
Date: Fri, 30 Mar 2018 21:03:19 -0700	[thread overview]
Message-ID: <152246899919.36038.9145086226090928099.stgit@dwillia2-desk3.amr.corp.intel.com> (raw)
In-Reply-To: <152246892890.36038.18436540150980653229.stgit@dwillia2-desk3.amr.corp.intel.com>

The HMM sub-system extended dev_pagemap to arrange a callback when a
dev_pagemap managed page is freed. Since a dev_pagemap page is free /
idle when its reference count is 1 it requires an additional branch to
check the page-type at put_page() time. Given put_page() is a hot-path
we do not want to incur that check if HMM is not in use, so a static
branch is used to avoid that overhead when not necessary.

Now, the FS_DAX implementation wants to reuse this mechanism for
receiving dev_pagemap ->page_free() callbacks. Rework the HMM-specific
static-key into a generic mechanism that either HMM or FS_DAX code paths
can enable.

For ARCH=um builds, and any other arch that lacks ZONE_DEVICE support,
care must be taken to compile out the DEV_PAGEMAP_OPS infrastructure.
However, we still need to support FS_DAX in the FS_DAX_LIMITED case
implemented by the s390/dcssblk driver.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Reported-by: Thomas Meyer <thomas@m3y3r.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: "Jérôme Glisse" <jglisse@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c      |    4 ++-
 fs/Kconfig               |    1 +
 include/linux/dax.h      |   35 ++++++++++++++++-------
 include/linux/memremap.h |   17 -----------
 include/linux/mm.h       |   71 ++++++++++++++++++++++++++++++++++------------
 kernel/memremap.c        |   30 +++++++++++++++++--
 mm/Kconfig               |    5 +++
 mm/hmm.c                 |   13 +-------
 mm/swap.c                |    3 +-
 9 files changed, 116 insertions(+), 63 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 7d260f118a39..3bafaddd02f1 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -164,7 +164,7 @@ struct dax_device {
 	const struct dax_operations *ops;
 };
 
-#if IS_ENABLED(CONFIG_FS_DAX)
+#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS)
 static void generic_dax_pagefree(struct page *page, void *data)
 {
 	/* TODO: wakeup page-idle waiters */
@@ -190,6 +190,7 @@ struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner)
 		return NULL;
 	}
 
+	dev_pagemap_get_ops();
 	pgmap->type = MEMORY_DEVICE_FS_DAX;
 	pgmap->page_free = generic_dax_pagefree;
 	pgmap->data = owner;
@@ -228,6 +229,7 @@ void __fs_dax_release(struct dax_device *dax_dev, void *owner)
 	pgmap->type = MEMORY_DEVICE_HOST;
 	pgmap->page_free = NULL;
 	pgmap->data = NULL;
+	dev_pagemap_put_ops();
 	mutex_unlock(&devmap_lock);
 }
 EXPORT_SYMBOL_GPL(__fs_dax_release);
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a86d965..1e050e012eb9 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
 	bool "Direct Access (DAX) support"
 	depends on MMU
 	depends on !(ARM || MIPS || SPARC)
+	select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
 	select FS_IOMAP
 	select DAX
 	help
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a88ff009e2a1..a36b74aa96e8 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -4,6 +4,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/genhd.h>
 #include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
@@ -87,12 +88,8 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
 	return dax_get_by_host(host);
 }
 
-struct dax_device *fs_dax_claim_bdev(struct block_device *bdev, void *owner);
-void fs_dax_release(struct dax_device *dax_dev, void *owner);
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
-struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner);
-void __fs_dax_release(struct dax_device *dax_dev, void *owner);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -104,26 +101,42 @@ static inline struct dax_device *fs_dax_get_by_host(const char *host)
 	return NULL;
 }
 
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS)
+struct dax_device *fs_dax_claim_bdev(struct block_device *bdev, void *owner);
+struct dax_device *fs_dax_claim(struct dax_device *dax_dev, void *owner);
+void __fs_dax_release(struct dax_device *dax_dev, void *owner);
+void fs_dax_release(struct dax_device *dax_dev, void *owner);
+#else
+#ifdef CONFIG_BLOCK
 static inline struct dax_device *fs_dax_claim_bdev(struct block_device *bdev,
 		void *owner)
 {
-	return NULL;
+	return fs_dax_get_by_host(bdev->bd_disk->disk_name);
 }
-
-static inline void fs_dax_release(struct dax_device *dax_dev, void *owner)
+#else
+static inline struct dax_device *fs_dax_claim_bdev(struct block_device *bdev,
+		void *owner)
 {
+	return NULL;
 }
+#endif
 
-static inline int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc)
+static inline void fs_dax_release(struct dax_device *dax_dev, void *owner)
 {
-	return -EOPNOTSUPP;
+	put_dax(dax_dev);
 }
 
 static inline struct dax_device *fs_dax_claim(struct dax_device *dax_dev,
 		void *owner)
 {
-	return NULL;
+	return dax_dev;
 }
 
 static inline void __fs_dax_release(struct dax_device *dax_dev, void *owner)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 02d6d042ee7f..8cc619fe347b 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_MEMREMAP_H_
 #define _LINUX_MEMREMAP_H_
-#include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/percpu-refcount.h>
 
@@ -137,8 +136,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
-
-static inline bool is_zone_device_page(const struct page *page);
 #else
 static inline void *devm_memremap_pages(struct device *dev,
 		struct dev_pagemap *pgmap)
@@ -169,20 +166,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
 }
 #endif /* CONFIG_ZONE_DEVICE */
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-static inline bool is_device_private_page(const struct page *page)
-{
-	return is_zone_device_page(page) &&
-		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
-}
-
-static inline bool is_device_public_page(const struct page *page)
-{
-	return is_zone_device_page(page) &&
-		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
-}
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
 static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
 {
 	if (pgmap)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ad06d42adb1a..be9969e3cf09 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -812,27 +812,65 @@ static inline bool is_zone_device_page(const struct page *page)
 }
 #endif
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page);
-DECLARE_STATIC_KEY_FALSE(device_private_key);
-#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key)
-static inline bool is_device_private_page(const struct page *page);
-static inline bool is_device_public_page(const struct page *page);
-#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-static inline void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+void dev_pagemap_get_ops(void);
+void dev_pagemap_put_ops(void);
+void __put_devmap_managed_page(struct page *page);
+DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
+static inline bool put_devmap_managed_page(struct page *page)
+{
+	if (!static_branch_unlikely(&devmap_managed_key))
+		return false;
+	if (!is_zone_device_page(page))
+		return false;
+	switch (page->pgmap->type) {
+	case MEMORY_DEVICE_PRIVATE:
+	case MEMORY_DEVICE_PUBLIC:
+	case MEMORY_DEVICE_FS_DAX:
+		__put_devmap_managed_page(page);
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static inline bool is_device_private_page(const struct page *page)
 {
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
 }
-#define IS_HMM_ENABLED 0
+
+static inline bool is_device_public_page(const struct page *page)
+{
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
+}
+
+#else /* CONFIG_DEV_PAGEMAP_OPS */
+static inline void dev_pagemap_get_ops(void)
+{
+}
+
+static inline void dev_pagemap_put_ops(void)
+{
+}
+
+static inline bool put_devmap_managed_page(struct page *page)
+{
+	return false;
+}
+
 static inline bool is_device_private_page(const struct page *page)
 {
 	return false;
 }
+
 static inline bool is_device_public_page(const struct page *page)
 {
 	return false;
 }
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
 
 static inline void get_page(struct page *page)
 {
@@ -850,16 +888,13 @@ static inline void put_page(struct page *page)
 	page = compound_head(page);
 
 	/*
-	 * For private device pages we need to catch refcount transition from
-	 * 2 to 1, when refcount reach one it means the private device page is
-	 * free and we need to inform the device driver through callback. See
+	 * For devmap managed pages we need to catch refcount transition from
+	 * 2 to 1, when refcount reach one it means the page is free and we
+	 * need to inform the device driver through callback. See
 	 * include/linux/memremap.h and HMM for details.
 	 */
-	if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) ||
-	    unlikely(is_device_public_page(page)))) {
-		put_zone_device_private_or_public_page(page);
+	if (put_devmap_managed_page(page))
 		return;
-	}
 
 	if (put_page_testzero(page))
 		__put_page(page);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 52a2742f527f..07a6a405cf3d 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -302,8 +302,30 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 	return pgmap;
 }
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-void put_zone_device_private_or_public_page(struct page *page)
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL_GPL(devmap_managed_key);
+static atomic_t devmap_enable;
+
+/*
+ * Toggle the static key for ->page_free() callbacks when dev_pagemap
+ * pages go idle.
+ */
+void dev_pagemap_get_ops(void)
+{
+	if (atomic_inc_return(&devmap_enable) == 1)
+		static_branch_enable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_get_ops);
+
+void dev_pagemap_put_ops(void)
+{
+	if (atomic_dec_and_test(&devmap_enable))
+		static_branch_disable(&devmap_managed_key);
+}
+EXPORT_SYMBOL_GPL(dev_pagemap_put_ops);
+
+void __put_devmap_managed_page(struct page *page)
 {
 	int count = page_ref_dec_return(page);
 
@@ -323,5 +345,5 @@ void put_zone_device_private_or_public_page(struct page *page)
 	} else if (!count)
 		__put_page(page);
 }
-EXPORT_SYMBOL(put_zone_device_private_or_public_page);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
+EXPORT_SYMBOL_GPL(__put_devmap_managed_page);
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
diff --git a/mm/Kconfig b/mm/Kconfig
index c782e8fb7235..dc32828984a3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -700,6 +700,9 @@ config ARCH_HAS_HMM
 config MIGRATE_VMA_HELPER
 	bool
 
+config DEV_PAGEMAP_OPS
+	bool
+
 config HMM
 	bool
 	select MIGRATE_VMA_HELPER
@@ -720,6 +723,7 @@ config DEVICE_PRIVATE
 	bool "Unaddressable device memory (GPU memory, ...)"
 	depends on ARCH_HAS_HMM
 	select HMM
+	select DEV_PAGEMAP_OPS
 
 	help
 	  Allows creation of struct pages to represent unaddressable device
@@ -730,6 +734,7 @@ config DEVICE_PUBLIC
 	bool "Addressable device memory (like GPU memory)"
 	depends on ARCH_HAS_HMM
 	select HMM
+	select DEV_PAGEMAP_OPS
 
 	help
 	  Allows creation of struct pages to represent addressable device
diff --git a/mm/hmm.c b/mm/hmm.c
index 320545b98ff5..4aa554e76d06 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -35,15 +35,6 @@
 
 #define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
-#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
-/*
- * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
- */
-DEFINE_STATIC_KEY_FALSE(device_private_key);
-EXPORT_SYMBOL(device_private_key);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-
-
 #if IS_ENABLED(CONFIG_HMM_MIRROR)
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
 
@@ -996,7 +987,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	resource_size_t addr;
 	int ret;
 
-	static_branch_enable(&device_private_key);
+	dev_pagemap_get_ops();
 
 	devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
 				   GFP_KERNEL, dev_to_node(device));
@@ -1090,7 +1081,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
 	if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
 		return ERR_PTR(-EINVAL);
 
-	static_branch_enable(&device_private_key);
+	dev_pagemap_get_ops();
 
 	devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
 				   GFP_KERNEL, dev_to_node(device));
diff --git a/mm/swap.c b/mm/swap.c
index 0f17330dd0e5..eed846cfc8b8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -29,6 +29,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/backing-dev.h>
+#include <linux/memremap.h>
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
@@ -744,7 +745,7 @@ void release_pages(struct page **pages, int nr)
 						       flags);
 				locked_pgdat = NULL;
 			}
-			put_zone_device_private_or_public_page(page);
+			put_devmap_managed_page(page);
 			continue;
 		}
 


  parent reply	other threads:[~2018-03-31  4:13 UTC|newest]

Thread overview: 111+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-31  4:02 [PATCH v8 00/18] dax: fix dma vs truncate/hole-punch Dan Williams
2018-03-31  4:02 ` Dan Williams
2018-03-31  4:02 ` Dan Williams
2018-03-31  4:02 ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 01/18] dax: store pfns in the radix Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 02/18] fs, dax: prepare for dax-specific address_space_operations Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 03/18] block, dax: remove dead code in blkdev_writepages() Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 04/18] xfs, dax: introduce xfs_dax_aops Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 05/18] ext4, dax: introduce ext4_dax_aops Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-04-03 11:50   ` Jan Kara
2018-04-03 11:50     ` Jan Kara
2018-04-03 11:50     ` Jan Kara
2018-03-31  4:02 ` [PATCH v8 06/18] ext2, dax: introduce ext2_dax_aops Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-04-03 11:51   ` Jan Kara
2018-04-03 11:51     ` Jan Kara
2018-03-31  4:02 ` [PATCH v8 07/18] fs, dax: use page->mapping to warn if truncate collides with a busy page Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 08/18] dax: introduce CONFIG_DAX_DRIVER Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02 ` [PATCH v8 09/18] dax, dm: allow device-mapper to operate without dax support Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:02   ` Dan Williams
2018-03-31  4:03 ` [PATCH v8 10/18] dax, dm: introduce ->fs_{claim, release}() dax_device infrastructure Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-04-03 18:24   ` Dan Williams
2018-04-03 18:24     ` Dan Williams
2018-04-03 19:39     ` Mike Snitzer
2018-04-03 19:39       ` Mike Snitzer
2018-04-03 19:47       ` Dan Williams
2018-04-03 19:47         ` Dan Williams
2018-04-03 20:36         ` [PATCH v9] " Dan Williams
2018-04-03 20:36           ` Dan Williams
2018-04-03 20:36           ` Dan Williams
2018-04-03 21:13           ` Mike Snitzer
2018-04-03 21:13             ` Mike Snitzer
2018-04-03 21:13             ` Mike Snitzer
2018-03-31  4:03 ` [PATCH v8 11/18] mm, dax: enable filesystems to trigger dev_pagemap ->page_free callbacks Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-04-04 21:23   ` [v8, " Andrei Vagin
2018-04-04 21:23     ` Andrei Vagin
2018-04-04 21:27     ` Dan Williams
2018-04-04 21:27       ` Dan Williams
2018-04-04 21:35       ` Dan Williams
2018-04-04 21:35         ` Dan Williams
2018-04-04 23:19         ` Stephen Rothwell
2018-04-04 21:40     ` Andrei Vagin
2018-04-04 21:40       ` Andrei Vagin
2018-03-31  4:03 ` [PATCH v8 12/18] memremap: split devm_memremap_pages() and memremap() infrastructure Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03 ` Dan Williams [this message]
2018-03-31  4:03   ` [PATCH v8 13/18] mm, dev_pagemap: introduce CONFIG_DEV_PAGEMAP_OPS Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03 ` [PATCH v8 14/18] memremap: mark devm_memremap_pages() EXPORT_SYMBOL_GPL Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03 ` [PATCH v8 15/18] mm, fs, dax: handle layout changes to pinned dax mappings Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-04-04  9:46   ` Jan Kara
2018-04-04  9:46     ` Jan Kara
2018-04-04 10:06     ` Jan Kara
2018-04-04 10:06       ` Jan Kara
2018-04-04 14:12     ` Dan Williams
2018-04-04 14:12       ` Dan Williams
2018-04-07 19:38     ` Dan Williams
2018-04-07 19:38       ` Dan Williams
2018-04-08  3:11       ` Paul E. McKenney
2018-04-08  3:11         ` Paul E. McKenney
2018-04-09 16:39         ` Jan Kara
2018-04-09 16:39           ` Jan Kara
2018-04-09 18:14           ` Paul E. McKenney
2018-04-09 18:14             ` Paul E. McKenney
2018-04-09 16:49       ` Jan Kara
2018-04-09 16:49         ` Jan Kara
2018-04-09 16:51         ` Dan Williams
2018-04-09 16:51           ` Dan Williams
2018-04-13 22:03           ` Dan Williams
2018-04-13 22:03             ` Dan Williams
2018-04-13 22:48             ` Paul E. McKenney
2018-04-13 22:48               ` Paul E. McKenney
2018-04-19 10:44             ` Jan Kara
2018-04-19 10:44               ` Jan Kara
2018-04-20  3:00               ` Dan Williams
2018-04-20  3:00                 ` Dan Williams
2018-03-31  4:03 ` [PATCH v8 16/18] xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03 ` [PATCH v8 17/18] xfs: prepare xfs_break_layouts() for another layout type Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03 ` [PATCH v8 18/18] xfs, dax: introduce xfs_break_dax_layouts() Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-03-31  4:03   ` Dan Williams
2018-04-04  9:55   ` Jan Kara
2018-04-04  9:55     ` Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=152246899919.36038.9145086226090928099.stgit@dwillia2-desk3.amr.corp.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=heiko.carstens@de.ibm.com \
    --cc=jack@suse.cz \
    --cc=jglisse@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=schwidefsky@de.ibm.com \
    --cc=snitzer@redhat.com \
    --cc=thomas@m3y3r.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.